From ade7980b250d0d679355d9583edd03deed871ff2 Mon Sep 17 00:00:00 2001 From: 魏曹先生 <1992414357@qq.com> Date: Mon, 9 Feb 2026 19:08:30 +0800 Subject: Rename parser to converter --- Cargo.lock | 4 +- Cargo.toml | 4 +- converter/Cargo.toml | 11 + converter/src/bin/mdialogc.rs | 95 +++ converter/src/error.rs | 121 ++++ converter/src/lib.rs | 5 + converter/src/macros.rs | 33 + converter/src/parse.rs | 1278 ++++++++++++++++++++++++++++++++++++++ converter/src/syntax_checker.rs | 201 ++++++ converter/src/utils.rs | 1 + converter/src/utils/path_fmt.rs | 123 ++++ converter/usage.txt | 6 + converter/version.txt | 2 + parser/Cargo.toml | 11 - parser/src/bin/mdialogp.rs | 95 --- parser/src/error.rs | 121 ---- parser/src/lib.rs | 5 - parser/src/macros.rs | 33 - parser/src/parse.rs | 1279 --------------------------------------- parser/src/syntax_checker.rs | 201 ------ parser/src/utils.rs | 1 - parser/src/utils/path_fmt.rs | 123 ---- src/lib.rs | 6 +- usage.txt | 6 - version.txt | 1 - 25 files changed, 1883 insertions(+), 1883 deletions(-) create mode 100644 converter/Cargo.toml create mode 100644 converter/src/bin/mdialogc.rs create mode 100644 converter/src/error.rs create mode 100644 converter/src/lib.rs create mode 100644 converter/src/macros.rs create mode 100644 converter/src/parse.rs create mode 100644 converter/src/syntax_checker.rs create mode 100644 converter/src/utils.rs create mode 100644 converter/src/utils/path_fmt.rs create mode 100644 converter/usage.txt create mode 100644 converter/version.txt delete mode 100644 parser/Cargo.toml delete mode 100644 parser/src/bin/mdialogp.rs delete mode 100644 parser/src/error.rs delete mode 100644 parser/src/lib.rs delete mode 100644 parser/src/macros.rs delete mode 100644 parser/src/parse.rs delete mode 100644 parser/src/syntax_checker.rs delete mode 100644 parser/src/utils.rs delete mode 100644 parser/src/utils/path_fmt.rs delete mode 100644 usage.txt delete mode 100644 version.txt diff --git a/Cargo.lock b/Cargo.lock index c2cc16b..d2f6e4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,12 +89,12 @@ name = "markdialog" version = "0.1.0" dependencies = [ "built_res", - "markdialog_parser", + "markdialog_converter", "markdialog_player", ] [[package]] -name = "markdialog_parser" +name = "markdialog_converter" version = "0.0.0" dependencies = [ "colored", diff --git a/Cargo.toml b/Cargo.toml index ee8b721..a6ce65a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,10 +7,10 @@ edition = "2024" members = [ "built_res", # Built Resources "player", # Dialog Player - "parser" # Markdown Parser + "converter" # Markdown Converter ] [dependencies] built_res = { path = "built_res" } markdialog_player = { path = "player" } -markdialog_parser = { path = "parser" } +markdialog_converter = { path = "converter" } diff --git a/converter/Cargo.toml b/converter/Cargo.toml new file mode 100644 index 0000000..982d18a --- /dev/null +++ b/converter/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "markdialog_converter" +workspaces.version = true +edition = "2024" + +[dependencies] +colored = "3.0" +strip-ansi-escapes = "0.2.1" +unicode-width = "0.2" +regex = "1.12" +sha2 = "0.10" diff --git a/converter/src/bin/mdialogc.rs b/converter/src/bin/mdialogc.rs new file mode 100644 index 0000000..ebe7804 --- /dev/null +++ b/converter/src/bin/mdialogc.rs @@ -0,0 +1,95 @@ +use markdialog_converter::{ + error::{Exit, handle_exit}, + parse::parse, + special_argument, special_flag, +}; +use std::{path::PathBuf, str::FromStr}; + +fn process() -> Result<(), Exit> { + let mut args: Vec = std::env::args().skip(1).collect(); + + let help = special_flag!(args, "--help") || special_flag!(args, "-h"); + let version = special_flag!(args, "--version") || special_flag!(args, "-v"); + + if version { + let version = include_str!("../../version.txt"); + println!("{}", version.trim()); + return Err(Exit::Code(0)); + } + + if help || args.len() < 1 { + let usage = include_str!("../../usage.txt"); + println!("{}", usage.trim()); + return Err(Exit::Code(0)); + } + + let input_file = get_input_file(&mut args)?; + let output_ir_file = get_output_ir_file(&mut args)?.unwrap_or_else(|| { + let mut path = input_file.clone(); + if let Some(file_name) = path.file_name() { + let mut new_name = std::ffi::OsString::new(); + new_name.push(file_name); + // Change extension to .dialog + path.set_extension("dialog"); + } else { + path.set_file_name("ir.dialog"); + } + path + }); + + parse(input_file, output_ir_file)?; + + Ok(()) +} + +fn get_input_file(args: &mut Vec) -> Result { + let input = match special_argument!(args, "--input") { + Some(i) => i, + None => match special_argument!(args, "-i") { + Some(i) => i, + None => { + eprintln!("Missing required input argument. Use --input or -i."); + std::process::exit(2); + } + }, + }; + + let input_file = PathBuf::from_str(&input).map_err(|_| { + eprintln!("Invalid file path `{}`!", input); + Exit::Code(2) + })?; + + Ok(input_file) +} + +fn get_output_ir_file(args: &mut Vec) -> Result, Exit> { + let input = match special_argument!(args, "--output") { + Some(i) => Some(i), + None => match special_argument!(args, "-o") { + Some(i) => Some(i), + None => None, + }, + }; + + match input { + Some(i) => { + let input_file = PathBuf::from_str(&i).map_err(|_| { + eprintln!("Invalid file path `{}`!", i); + return Exit::Code(2); + })?; + Ok(Some(input_file)) + } + None => Ok(None), + } +} + +fn main() { + // Init colored + #[cfg(windows)] + colored::control::set_virtual_terminal(true).unwrap(); + + match process() { + Ok(_) => {} + Err(e) => handle_exit(e), + } +} diff --git a/converter/src/error.rs b/converter/src/error.rs new file mode 100644 index 0000000..b594165 --- /dev/null +++ b/converter/src/error.rs @@ -0,0 +1,121 @@ +use std::{i64, path::PathBuf, process::exit}; + +use colored::Colorize; +use unicode_width::UnicodeWidthStr; + +#[derive(Debug)] +pub enum Exit { + Code(i32), + IoError(std::io::Error), + FileNotFound(PathBuf), + SyntaxError { + content: String, + reason: String, + line: i64, + begin: i64, + end: i64, + }, + DuplicateMarker(String), + CycleDependency(PathBuf), +} + +impl From for Exit { + fn from(error: std::io::Error) -> Self { + Exit::IoError(error) + } +} + +pub fn handle_exit(e: Exit) { + match e { + Exit::Code(code) => exit(code), + Exit::IoError(error) => print_parse_error(error.to_string()), + Exit::FileNotFound(path_buf) => { + eprintln!("File `{}` not found!", path_buf.display()); + exit(1) + } + Exit::SyntaxError { + content, + reason, + line, + begin, + end, + } => { + print_syntax_error(content, reason, line, begin, end); + } + Exit::DuplicateMarker(marker) => { + eprintln!("Duplicate marker `{}` found!", marker); + exit(1) + } + Exit::CycleDependency(dialog) => { + eprintln!("Dialog `{}` depends on itself!", dialog.display()); + exit(1) + } + } +} + +fn print_parse_error(content: impl AsRef) { + eprintln!("Parse Error !"); + eprintln!("{}", content.as_ref().trim()); + exit(1); +} + +macro_rules! line { + ($line:expr, $N:expr) => { + if $line + $N <= 0 { + " ".to_string() + } else { + ($line + $N).to_string() + } + }; +} + +pub fn print_syntax_error(content: String, reason: String, line: i64, begin: i64, end: i64) { + let content_len = content.width() as i64; + let end = end.clamp(begin, content_len); + + eprintln!("{}", "Parse Failed: Syntax Error".bright_yellow()); + eprintln!("{}{}", line!(line, -1), "|"); + + let before: String = content.chars().take(begin.max(0) as usize).collect(); + let highlight_len = (end - begin).max(1) as usize; + let highlight: String = content + .chars() + .skip(begin.max(0) as usize) + .take(highlight_len) + .collect(); + let after: String = content + .chars() + .skip((begin.max(0) + highlight_len as i64) as usize) + .collect(); + + eprintln!( + "{}{} {}{}{}", + line.to_string().cyan(), + "|".cyan(), + before.cyan(), + highlight.bright_cyan(), + after.cyan() + ); + + let prefix_chars: String = content.chars().take(begin.max(0) as usize).collect(); + let prefix_width = prefix_chars.width() as usize; + + eprintln!( + "{}{} {}", + line!(line, 1), + "|", + format!( + "{}{}____ {}", + " ".repeat(prefix_width), + "^".repeat(((end - begin).max(1)) as usize), + reason + ) + .bright_cyan() + ); + eprintln!("{}{}", line!(line, 2), "|"); + eprintln!( + "{}", + "Please fix the issue and run the program again".bright_yellow() + ); + exit(1); +} diff --git a/converter/src/lib.rs b/converter/src/lib.rs new file mode 100644 index 0000000..d7caac3 --- /dev/null +++ b/converter/src/lib.rs @@ -0,0 +1,5 @@ +pub mod error; +pub mod macros; +pub mod parse; +pub mod syntax_checker; +pub mod utils; diff --git a/converter/src/macros.rs b/converter/src/macros.rs new file mode 100644 index 0000000..894b3f4 --- /dev/null +++ b/converter/src/macros.rs @@ -0,0 +1,33 @@ +#[macro_export] +macro_rules! special_flag { + ($args:expr, $flag:expr) => {{ + let flag = $flag; + let found = $args.iter().any(|arg| arg == flag); + $args.retain(|arg| arg != flag); + found + }}; +} + +#[macro_export] +macro_rules! special_argument { + ($args:expr, $flag:expr) => {{ + let flag = $flag; + let mut value: Option = None; + let mut i = 0; + while i < $args.len() { + if $args[i] == flag { + if i + 1 < $args.len() { + value = Some($args[i + 1].clone()); + $args.remove(i + 1); + $args.remove(i); + } else { + value = None; + $args.remove(i); + } + break; + } + i += 1; + } + value + }}; +} diff --git a/converter/src/parse.rs b/converter/src/parse.rs new file mode 100644 index 0000000..c480c3a --- /dev/null +++ b/converter/src/parse.rs @@ -0,0 +1,1278 @@ +use std::path::{Path, PathBuf}; + +use regex::Regex; +use sha2::{Digest, Sha256}; + +use crate::{error::Exit, syntax_checker::check_markdown_syntax, utils::path_fmt::format_path}; + +pub fn parse(input: PathBuf, ir_output: PathBuf) -> Result<(), Exit> { + let result = std::fs::read_to_string(&input)?; + + check_markdown_syntax(&result)?; + + let result = unwrap_includes(result, input)?; + + check_duplicate_marker(&result)?; + + let result = clean_markdown(result)?; + let result = fix_mark_jump(result)?; + let result = replace_marker_name(result)?; + let result = convert_to_step_sentence_structure(result)?; + let result = strip_invalid_jump(result)?; + let result = convert_image_to_code(result)?; + let result = apply_code_lines(result)?; + let result = split_sentence_and_encode(result)?; + + std::fs::write(&ir_output, result)?; + Ok(()) +} + +/// Expand text includes of [[Dialog.md]] +pub fn unwrap_includes(input: String, self_path: PathBuf) -> Result { + let mut stack = Vec::::new(); + expand_recursive(input, &self_path, &mut stack) +} + +fn expand_recursive( + content: String, + current_path: &Path, + stack: &mut Vec, +) -> Result { + let mut output = String::new(); + let mut in_code_block = false; + + let current_norm = format_path(current_path)?; + + if stack.contains(¤t_norm) { + return Err(Exit::CycleDependency(current_norm)); + } + + stack.push(current_norm.clone()); + + for line in content.lines() { + if line.trim().starts_with("```") { + in_code_block = !in_code_block; + output.push_str(line); + output.push('\n'); + continue; + } + + if in_code_block { + output.push_str(line); + output.push('\n'); + continue; + } + + if let Some(include_path) = extract_include(line) { + let include_abs = format_path(¤t_path.parent().unwrap().join(include_path))?; + let include_content = std::fs::read_to_string(&include_abs).map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + Exit::FileNotFound(include_abs.clone()) + } else { + Exit::IoError(e) + } + })?; + + let expanded = expand_recursive(include_content, &include_abs, stack)?; + output.push_str(&expanded); + } else { + output.push_str(line); + output.push('\n'); + } + } + + stack.pop(); + + Ok(output) +} + +fn extract_include(line: &str) -> Option<&str> { + line.trim() + .strip_prefix("[[") + .and_then(|s| s.strip_suffix("]]")) +} + +/// Check for duplicate markers +pub fn check_duplicate_marker(input: &String) -> Result<(), Exit> { + let mut seen = std::collections::HashSet::new(); + let heading_re = Regex::new(r"^(#{1,5})\s+(.+)$").unwrap(); + + for line in input.lines() { + if let Some(caps) = heading_re.captures(line) { + let heading_text = caps[2].trim().to_string(); + if seen.contains(&heading_text) { + return Err(Exit::DuplicateMarker(heading_text)); + } + seen.insert(heading_text); + } + } + + Ok(()) +} + +/// Clean Markdown +/// 1. Remove blockquotes +/// 2. Remove empty lines +/// 3. Trim each line +pub fn clean_markdown(i: String) -> Result { + let lines = i.lines(); + let mut cleaned = Vec::new(); + + for line in lines { + if line.starts_with('>') { + continue; + } + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + cleaned.push(trimmed.to_string()); + } + + Ok(cleaned.join("\n")) +} + +#[cfg(test)] +mod test_clean_markdown { + use super::*; + + #[test] + fn test_clean_markdown_removes_blockquotes() { + let input = "> This is a blockquote\nNormal text\n> Another blockquote".to_string(); + let expected = "Normal text".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_removes_empty_lines() { + let input = "Line 1\n\n\nLine 2\n\n".to_string(); + let expected = "Line 1\nLine 2".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_trims_lines() { + let input = " Line 1 \n\tLine 2\t\n".to_string(); + let expected = "Line 1\nLine 2".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_combined() { + let input = "> Blockquote\n\n Line 1 \n> Another\n\nLine 2\n\n".to_string(); + let expected = "Line 1\nLine 2".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_empty_input() { + let input = "".to_string(); + let expected = "".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_only_blockquotes() { + let input = "> Quote 1\n> Quote 2".to_string(); + let expected = "".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_only_whitespace() { + let input = " \n\t\n ".to_string(); + let expected = "".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } +} + +/// Fix jump syntax in each line +/// 1. Correct the following syntax +/// ```ignore +/// - It's [Item](#Mark) +/// > corrected to +/// - It's Item [](#Mark) +/// ``` +/// +/// 2. If there are multiple options, take the first one +/// ```ignore +/// - There might be two options: [A](#A) and [B](#B)! +/// > corrected to +/// - There might be two options: A and B! [](#A) +/// ``` +pub fn fix_mark_jump(i: String) -> Result { + let mut result = String::new(); + + for line in i.lines() { + let (processed_content, first_link_dest) = helper_process_line_content(line); + let processed_line = helper_format_line_with_link(processed_content, first_link_dest); + let final_line = helper_convert_ordered_list_marker(processed_line); + + result.push_str(&final_line); + result.push('\n'); + } + + if result.ends_with('\n') { + result.pop(); + } + + Ok(result) +} + +/// Process line content, extract link text and return the first link target +/// +/// # Examples +/// +/// ``` +/// use markdialog_parser::parse::helper_process_line_content; +/// +/// // Single link +/// let (content, dest) = helper_process_line_content("This is a [Link](#target) Example"); +/// assert_eq!(content, "This is a Link Example"); +/// assert_eq!(dest, Some("target".to_string())); +/// +/// // Extract the first link +/// let (content, dest) = helper_process_line_content("First [link1](#target1) and second [link2](#target2)"); +/// assert_eq!(content, "First link1 and second link2"); +/// assert_eq!(dest, Some("target1".to_string())); +/// +/// // No link +/// let (content, dest) = helper_process_line_content("Text without link"); +/// assert_eq!(content, "Text without link"); +/// assert_eq!(dest, None); +/// +/// // Invalid link +/// let (content, dest) = helper_process_line_content("Invalid [link format"); +/// assert_eq!(content, "Invalid ["); +/// assert_eq!(dest, None); +/// +/// // Empty +/// let (content, dest) = helper_process_line_content(""); +/// assert_eq!(content, ""); +/// assert_eq!(dest, None); +/// +/// // Link target contains spaces and extra # symbols +/// let (content, dest) = helper_process_line_content("Link[text](# target#)"); +/// assert_eq!(content, "Linktext"); +/// assert_eq!(dest, Some("target".to_string())); +/// ``` +pub fn helper_process_line_content(line: &str) -> (String, Option) { + // Check if line is an image line (starts with "![") + if line.starts_with("![") { + // Return the original line unchanged with no link destination + return (line.to_string(), None); + } + + let mut processed = String::new(); + let mut chars = line.chars().peekable(); + let mut first_link_dest = None; + let mut has_link = false; + + while let Some(ch) = chars.next() { + if ch == '[' { + if let Some((link_text, link_dest, remaining_chars)) = helper_parse_link(&mut chars) { + processed.push_str(&link_text); + if !has_link { + first_link_dest = Some(link_dest); + has_link = true; + } + chars = remaining_chars; + continue; + } else { + // Invalid + processed.push(ch); + } + } else { + processed.push(ch); + } + } + + (processed, first_link_dest) +} + +/// Parse possible Markdown links, return (link text, link target, remaining character iterator) +/// +/// # Examples +/// +/// ``` +/// use markdialog_parser::parse::helper_parse_link; +/// +/// // Standard Link +/// let mut chars = "[Link](#target)".chars().peekable(); +/// chars.next(); // Skip '[' +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_some()); +/// let (text, dest, _) = result.unwrap(); +/// assert_eq!(text, "Link"); +/// assert_eq!(dest, "target"); +/// +/// // Link text contains spaces +/// let mut chars = "[Link text](#target)".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_some()); +/// let (text, dest, _) = result.unwrap(); +/// assert_eq!(text, "Link text"); +/// assert_eq!(dest, "target"); +/// +/// // Link target contains spaces and extra # symbols +/// let mut chars = "[text](# target#)".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_some()); +/// let (text, dest, _) = result.unwrap(); +/// assert_eq!(text, "text"); +/// assert_eq!(dest, "target"); +/// +/// // Invalid format: missing ']' +/// let mut chars = "[Link(#target)".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_none()); +/// +/// // Invalid format: missing '(#' +/// let mut chars = "[Link]target)".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_none()); +/// +/// // Invalid format: missing ')' +/// let mut chars = "[Link](#target".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_some()); +/// let (text, dest, _) = result.unwrap(); +/// assert_eq!(text, "Link"); +/// assert_eq!(dest, "target"); +/// ``` +pub fn helper_parse_link<'a>( + chars: &mut std::iter::Peekable>, +) -> Option<(String, String, std::iter::Peekable>)> { + let mut link_text = String::new(); + + while let Some(&ch) = chars.peek() { + chars.next(); + if ch == ']' { + break; + } + link_text.push(ch); + } + + if chars.next() != Some('(') || chars.next() != Some('#') { + return None; + } + + let mut link_dest = String::new(); + while let Some(ch) = chars.next() { + if ch == ')' { + break; + } + link_dest.push(ch); + } + + let cleaned_dest = link_dest.trim().replace(' ', "").replace('#', ""); + + Some((link_text, cleaned_dest, chars.clone())) +} + +/// If there is a link dest, add a jump marker at the end of the line +/// +/// # Examples +/// +/// ``` +/// use markdialog_parser::parse::helper_format_line_with_link; +/// +/// // With a link dest +/// let content = "Some content".to_string(); +/// let link_dest = Some("target".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Some content [](#target)"); +/// +/// // With empty content and a link dest +/// let content = "".to_string(); +/// let link_dest = Some("target".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "[](#target)"); +/// +/// // With trailing spaces in content +/// let content = "Content with spaces ".to_string(); +/// let link_dest = Some("target".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Content with spaces [](#target)"); +/// +/// // Without a link dest +/// let content = "Some content".to_string(); +/// let link_dest = None; +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Some content"); +/// +/// // With an empty link dest +/// let content = "Some content".to_string(); +/// let link_dest = Some("".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Some content"); +/// +/// // With whitespace-only link dest +/// let content = "Some content".to_string(); +/// let link_dest = Some(" ".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Some content"); +/// ``` +pub fn helper_format_line_with_link(content: String, link_dest: Option) -> String { + match link_dest { + Some(dest) if !dest.trim().is_empty() => { + format!("{} [](#{})", content.trim_end(), dest.trim()) + .trim() + .to_string() + } + _ => content, + } +} + +/// Convert ordered list markers to unordered list markers +/// +/// # Examples +/// +/// ``` +/// use markdialog_parser::parse::helper_convert_ordered_list_marker; +/// +/// // Basic conversion +/// let input = "1. First item".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- First item"); +/// +/// // Multi-digit numbers +/// let input = "10. Tenth item".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- Tenth item"); +/// +/// // With leading spaces +/// let input = " 2. Second item".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- Second item"); +/// +/// // Not an ordered list marker (no dot and space) +/// let input = "1.Not a list".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "1.Not a list"); +/// +/// // Not an ordered list marker (different spacing) +/// let input = "1. Extra space".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- Extra space"); +/// +/// // Already unordered list +/// let input = "- Already unordered".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- Already unordered"); +/// +/// // Regular text +/// let input = "This is not a list".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "This is not a list"); +/// +/// // Empty string +/// let input = "".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, ""); +/// +/// // Only whitespace +/// let input = " ".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, " "); +/// ``` +pub fn helper_convert_ordered_list_marker(line: String) -> String { + let trimmed = line.trim_start(); + + if let Some(_rest) = trimmed.strip_prefix(|c: char| c.is_ascii_digit()) { + let mut chars = trimmed.chars(); + let mut digit_count = 0; + + while let Some(c) = chars.next() { + if c.is_ascii_digit() { + digit_count += 1; + } else { + break; + } + } + + if digit_count > 0 { + let rest_after_digits = &trimmed[digit_count..]; + if let Some(content) = rest_after_digits.strip_prefix(". ") { + return format!("- {}", content); + } + } + } + + line +} + +#[cfg(test)] +mod test_fix_mark_jump { + use super::*; + + #[test] + fn test_fix_mark_jump_single_link() { + let input = "- It's [Item](#Mark)".to_string(); + let expected = "- It's Item [](#Mark)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_multiple_links_takes_first() { + let input = "- There might be two options: [A](#A) and [B](#B)!".to_string(); + let expected = "- There might be two options: A and B! [](#A)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_no_link() { + let input = "- Just a normal line".to_string(); + let expected = "- Just a normal line".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_empty_line() { + let input = "".to_string(); + let expected = "".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_multiple_lines() { + let input = "- First [Item](#First)\n- Second [Item](#Second)".to_string(); + let expected = "- First Item [](#First)\n- Second Item [](#Second)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_link_at_end() { + let input = "- End with [link](#target)".to_string(); + let expected = "- End with link [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_link_at_beginning() { + let input = "- [Start](#target) with link".to_string(); + let expected = "- Start with link [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_link_in_middle() { + let input = "- Text [middle](#target) text".to_string(); + let expected = "- Text middle text [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_ordered_list_conversion() { + let input = "1. [Item](#target)".to_string(); + let expected = "- Item [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_ordered_list_multiple_digits() { + let input = "10. [Tenth](#target) item".to_string(); + let expected = "- Tenth item [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_mixed_ordered_and_unordered() { + let input = "1. [First](#first)\n- [Second](#second)\n2. [Third](#third)".to_string(); + let expected = "- First [](#first)\n- Second [](#second)\n- Third [](#third)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_invalid_link_format() { + let input = "- Invalid [link format".to_string(); + let expected = "- Invalid [".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_link_with_spaces_in_target() { + let input = "- Link [text](# target#)".to_string(); + let expected = "- Link text [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_empty_link_text() { + let input = "- [](#target)".to_string(); + let expected = "- [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_only_whitespace() { + let input = " ".to_string(); + let expected = " ".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_complex_multiple_links() { + let input = "- Choose [A](#A), [B](#B), or [C](#C)!".to_string(); + let expected = "- Choose A, B, or C! [](#A)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } +} + +/// Replace marker names: replace heading text and link anchors with corresponding SHA256 +/// +/// Example: +/// ```ignore +/// # Original text +/// # Chapter Title +/// - Jump to [Chapter Title](#Chapter Title) +/// +/// # After processing +/// # a1b2c3d4 +/// - Jump to [](#a1b2c3d4) +/// ``` +pub fn replace_marker_name(i: String) -> Result { + let mut result = i; + + let heading_re = Regex::new(r"^(#{1,5})\s+(.+)$").unwrap(); + let mut heading_map = std::collections::HashMap::new(); + + for line in result.lines() { + if let Some(caps) = heading_re.captures(line) { + let heading_text = caps[2].trim().to_string(); + let hash = format!("{:x}", Sha256::digest(heading_text.as_bytes())); + let short_hash = &hash[..8]; + heading_map.insert(heading_text, short_hash.to_string()); + } + } + + let mut lines: Vec = Vec::new(); + for line in result.lines() { + if let Some(caps) = heading_re.captures(line) { + let level = &caps[1]; + let heading_text = caps[2].trim(); + + if let Some(hash) = heading_map.get(heading_text) { + lines.push(format!("{} {}", level, hash)); + } else { + lines.push(line.to_string()); + } + } else { + lines.push(line.to_string()); + } + } + result = lines.join("\n"); + + let link_re = Regex::new(r"\[\]\(#([^)]+)\)").unwrap(); + result = link_re + .replace_all(&result, |caps: ®ex::Captures| { + let anchor_name = &caps[1]; + if let Some(hash) = heading_map.get(anchor_name) { + format!("[](#{})", hash) + } else { + let hash = format!("{:x}", Sha256::digest(anchor_name.as_bytes())); + let short_hash = &hash[..8]; + format!("[](#{})", short_hash) + } + }) + .to_string(); + + Ok(result) +} + +/// Split content into Step + Sentence structure +pub fn convert_to_step_sentence_structure(input: String) -> Result { + let mut result = String::new(); + let mut current_marker = String::new(); + let mut current_step_id = 0; + let mut current_character = String::new(); + let mut has_no_switch_flag = false; + + let mut code_record_mode = false; + let mut option_record_mode = false; + + let mut sentences_buffer = String::new(); + for line in input.split("\n") { + // Record code + if code_record_mode { + // If code block marker is found again, end code recording + if line.starts_with("```") && code_record_mode { + sentences_buffer.push_str("\n"); + code_record_mode = false; + continue; + } + sentences_buffer.push_str(format!("`{}`", line).as_str()); + continue; + } + + // Record options + if option_record_mode { + // Still an option, continue appending + if line.starts_with("- ") { + let (sentence, next) = helper_get_jump_from_line(line); + let next = if let Some(next) = next { + format!("->[#{}_0]", next) + } else { + next_flag(current_marker.as_str(), current_step_id) + }; + let option_line = format!( + "{}[{}]{}", + character(¤t_character, has_no_switch_flag), + sentence, + next + ); + sentences_buffer.push_str(option_line.as_str()); + sentences_buffer.push('\n'); + continue; + } else { + // When ending option recording, create and advance one Step + result.push_str(step_line(current_marker.as_str(), current_step_id).as_str()); + result.push('\n'); + result.push_str(sentences_buffer.as_str()); + sentences_buffer.clear(); + current_step_id += 1; + // Clean "Has no switch flag" + has_no_switch_flag = false; + // Close option mode + option_record_mode = false; + // Do not continue here, proceed to process subsequent content + } + } + + // Refresh heading + if helper_is_marker(line) { + current_marker = helper_read_maker(line).to_string(); + current_step_id = 0; + continue; + } + + // Refresh character + if helper_is_character(line) { + let (character, no_switch_flag) = helper_read_character(line); + current_character = character.to_string(); + has_no_switch_flag = no_switch_flag; + continue; + } + + // Image recording + if line.starts_with('!') { + sentences_buffer.push_str(line); + sentences_buffer.push('\n'); + continue; + } + + // Start code recording + if line.starts_with("```") && !code_record_mode { + code_record_mode = true; + continue; + } + + // Option recording + if line.starts_with("- ") { + let (sentence, next) = helper_get_jump_from_line(line); + let next = if let Some(next) = next { + format!("->[#{}_0]", next) + } else { + next_flag(current_marker.as_str(), current_step_id) + }; + let option_line = format!( + "{}[{}]{}", + character(¤t_character, has_no_switch_flag), + sentence, + next + ); + sentences_buffer.push_str(option_line.as_str()); + sentences_buffer.push('\n'); + + // Start option recording mode + if !option_record_mode { + option_record_mode = true; + } + continue; + } + + // Normal sentence + let (sentence, next) = helper_get_jump_from_line(line); + let next = if let Some(next) = next { + format!("->[#{}_0]", next) + } else { + next_flag(current_marker.as_str(), current_step_id) + }; + let sentence_line = format!( + "{}[{}]{}", + character(¤t_character, has_no_switch_flag), + sentence, + next + ); + has_no_switch_flag = false; + + // Create and advance one Step + result.push_str(step_line(current_marker.as_str(), current_step_id).as_str()); + result.push('\n'); + result.push_str(sentences_buffer.as_str()); + sentences_buffer.clear(); + result.push_str(sentence_line.as_str()); + result.push('\n'); + current_step_id += 1; + } + + Ok(result) +} + +pub fn character(character: &str, has_no_switch_flag: bool) -> String { + let flag = if has_no_switch_flag { "*" } else { "" }; + format!("[{}{}{}]:", &flag, character, &flag) +} + +pub fn step_name(marker: &str, current_id: i64) -> String { + format!("{}_{}", marker, current_id) +} + +pub fn step_line(marker: &str, current_id: i64) -> String { + format!("@@@@@@@@@@ {}_{}", marker, current_id) +} + +pub fn next_flag(marker: &str, current_id: i64) -> String { + format!("->[#{}_{}]", marker, current_id + 1) +} + +pub fn helper_is_marker(line: &str) -> bool { + line.starts_with("# ") + || line.starts_with("## ") + || line.starts_with("### ") + || line.starts_with("#### ") + || line.starts_with("##### ") +} + +pub fn helper_read_maker(line: &str) -> &str { + let trimmed = line.trim_start(); + if trimmed.starts_with('#') { + if trimmed.starts_with("# ") + || trimmed.starts_with("## ") + || trimmed.starts_with("### ") + || trimmed.starts_with("#### ") + || trimmed.starts_with("##### ") + { + let parts: Vec<&str> = trimmed.splitn(2, ' ').collect(); + if parts.len() == 2 { + return parts[1].trim(); + } + } + } + "" +} + +pub fn helper_is_character(line: &str) -> bool { + line.starts_with("######") +} + +pub fn helper_read_character(line: &str) -> (&str, bool) { + let trimmed = line.trim_start(); + if trimmed.starts_with("######") { + let parts: Vec<&str> = trimmed.splitn(2, ' ').collect(); + if parts.len() == 2 { + let character = parts[1].trim(); + if character.starts_with('*') && character.ends_with('*') { + let trimmed = character.trim_matches('*'); + return (trimmed.trim(), true); + } else { + return (character.trim(), false); + } + } + } + ("", false) +} + +pub fn helper_get_jump_from_line(line: &str) -> (String, Option) { + let pattern = r"\[\]\(#([^)]+)\)$"; + let re = Regex::new(pattern).unwrap(); + + if let Some(caps) = re.captures(line.trim_end()) { + let target = caps.get(1).unwrap().as_str(); + let line_without_jump = line + .trim_end() + .replace(&format!(" [](#{})", target), "") + .to_string(); + return ( + line_without_jump.trim_start_matches("- ").to_string(), + Some(format!("{}", target)), + ); + } + + (line.trim_start_matches("- ").to_string(), None) +} + +/// Strip all jumps that have not appeared +pub fn strip_invalid_jump(input: String) -> Result { + let lines: Vec<&str> = input.lines().collect(); + let mut valid_ids = std::collections::HashSet::new(); + + for line in &lines { + if line.starts_with("@@@@@@@@@@ ") { + let id = line.trim_start_matches("@@@@@@@@@@ ").trim(); + valid_ids.insert(id.to_string()); + } + } + + let mut result_lines = Vec::new(); + let link_re = Regex::new(r"\[#([^)]+)\]").unwrap(); + + for line in lines { + let processed_line = link_re.replace_all(line, |caps: ®ex::Captures| { + let id = &caps[1]; + if valid_ids.contains(id) { + format!("[#{}]", id) + } else { + "[]".to_string() + } + }); + result_lines.push(processed_line.to_string()); + } + + Ok(result_lines.join("\n")) +} + +/// Convert image lines to code lines +pub fn convert_image_to_code(input: String) -> Result { + let mut result = String::new(); + let lines: Vec<&str> = input.lines().collect(); + let image_re = Regex::new(r"^!\[[^\]]*\]\(([^)]+)\)$").unwrap(); + + for line in lines { + if let Some(caps) = image_re.captures(line) { + let image_path = caps.get(1).unwrap().as_str(); + result.push_str(&format!("`image \"{}\"`\n", image_path)); + } else { + result.push_str(line); + result.push('\n'); + } + } + + // Remove trailing newline if present + if result.ends_with('\n') { + result.pop(); + } + + Ok(result) +} + +/// Apply code lines to sentences +pub fn apply_code_lines(input: String) -> Result { + let mut out = String::new(); + let lines: Vec<&str> = input.lines().collect(); + + let mut i = 0; + while i < lines.len() { + let line = lines[i]; + + if !line.trim_start().starts_with('`') { + out.push_str(line); + out.push('\n'); + i += 1; + continue; + } + + let mut code_buf = String::new(); + while i < lines.len() && { + let line: &str = lines[i]; + line.trim_start().starts_with('`') + } { + code_buf.push_str(lines[i].trim()); + i += 1; + } + + if i >= lines.len() + || !{ + let line: &str = lines[i]; + line.trim_start().starts_with('[') + } + { + continue; + } + + if i + 1 < lines.len() && { + let line: &str = lines[i + 1]; + line.trim_start().starts_with('[') + } { + continue; + } + + let merged = helper_merge_code_into_sentence(&code_buf, lines[i]); + out.push_str(&merged); + out.push('\n'); + i += 1; + } + + Ok(out) +} + +fn helper_merge_code_into_sentence(code: &str, sentence: &str) -> String { + if let Some(start) = sentence.find(":[") { + if let Some(_) = sentence[start + 2..].find(']') { + let content_start = start + 2; + + let mut result = String::new(); + result.push_str(&sentence[..content_start]); + result.push_str(code); + result.push_str(&sentence[content_start..]); + return result; + } + } + + sentence.to_string() +} + +/// Split sentences into embeddable tokens and perform Unicode encoding +pub fn split_sentence_and_encode(input: String) -> Result { + let mut result = String::new(); + let lines: Vec<&str> = input.lines().collect(); + + for line in lines { + if line.starts_with('[') && line.contains("]:[") && line.contains("]->[") { + if let Some(start) = line.find("]:[") { + if let Some(end) = line.find("]->[") { + let content = &line[start + 3..end]; + let processed_content = helper_process_sentence_content(content); + + let suffix = &line[end + 1..]; + + let char_end = start; + let char_start = 1; + let character = &line[char_start..char_end]; + let encoded_character = helper_encode_unicode(character); + + // Build the new line with encoded character and processed content + let new_line = + format!("[{}]:{}{}", encoded_character, processed_content, suffix); + result.push_str(&format!("{}\n", new_line)); + continue; + } + } + } + result.push_str(&format!("{}\n", line)); + } + + if result.ends_with('\n') { + result.pop(); + } + + Ok(result) +} + +fn helper_process_sentence_content(content: &str) -> String { + let mut result = String::new(); + let mut chars = content.chars().peekable(); + let mut current_text = String::new(); + let mut in_code = false; + let mut in_bold = false; + let mut in_italic = false; + let mut code_buffer = String::new(); + let mut backticks_count = 0; + + while let Some(ch) = chars.next() { + match ch { + '`' => { + backticks_count += 1; + if backticks_count == 1 { + // Start of code block + if !current_text.is_empty() { + let encoded_text = helper_encode_unicode(¤t_text); + result.push_str(&format!("[text:[{}]]", encoded_text)); + current_text.clear(); + } + code_buffer.push(ch); + in_code = true; + } else if backticks_count == 2 && in_code { + // End of code block + code_buffer.push(ch); + let encoded_code = helper_encode_unicode(&code_buffer); + result.push_str(&format!("[code:[{}]]", encoded_code)); + code_buffer.clear(); + backticks_count = 0; + in_code = false; + } else if backticks_count == 1 && !in_code { + // Single backtick in text + current_text.push(ch); + } + } + '*' => { + if in_code { + code_buffer.push(ch); + continue; + } + + // Check for bold + if chars.peek() == Some(&'*') { + chars.next(); // Consume the second '*' + + if in_bold { + // End bold + if !current_text.is_empty() { + let encoded_text = helper_encode_unicode(¤t_text); + result.push_str(&format!("[bold:[{}]]", encoded_text)); + current_text.clear(); + } + in_bold = false; + } else if in_italic { + if !current_text.is_empty() { + let encoded_text = helper_encode_unicode(¤t_text); + result.push_str(&format!("[italic:[{}]]", encoded_text)); + current_text.clear(); + } + in_italic = false; + // Start bold_italic + in_bold = true; + } else { + // Start bold + if !current_text.is_empty() { + let encoded_text = helper_encode_unicode(¤t_text); + result.push_str(&format!("[text:[{}]]", encoded_text)); + current_text.clear(); + } + in_bold = true; + } + } else { + if in_italic { + // End italic + if !current_text.is_empty() { + let encoded_text = helper_encode_unicode(¤t_text); + result.push_str(&format!("[italic:[{}]]", encoded_text)); + current_text.clear(); + } + in_italic = false; + } else if in_bold { + if !current_text.is_empty() { + let encoded_text = helper_encode_unicode(¤t_text); + result.push_str(&format!("[bold:[{}]]", encoded_text)); + current_text.clear(); + } + // Start bold_italic + in_bold = true; + in_italic = true; + } else { + // Start italic + if !current_text.is_empty() { + let encoded_text = helper_encode_unicode(¤t_text); + result.push_str(&format!("[text:[{}]]", encoded_text)); + current_text.clear(); + } + in_italic = true; + } + } + } + _ => { + if in_code { + code_buffer.push(ch); + } else { + current_text.push(ch); + } + } + } + } + + // Handle any remaining text + if !code_buffer.is_empty() { + let encoded_code = helper_encode_unicode(&code_buffer); + result.push_str(&format!("[code:[{}]]", encoded_code)); + } + + if !current_text.is_empty() { + let style = match (in_bold, in_italic) { + (true, true) => "bold_italic", + (true, false) => "bold", + (false, true) => "italic", + (false, false) => "text", + }; + let encoded_text = helper_encode_unicode(¤t_text); + result.push_str(&format!("[{}:[{}]]", style, encoded_text)); + } + + result +} + +fn helper_encode_unicode(s: &str) -> String { + let mut result = String::new(); + for ch in s.chars() { + let code = ch as u32; + if code <= 0x7F { + result.push(ch); + } else { + result.push_str(&format!("\\u{:X}", code)); + } + } + result +} diff --git a/converter/src/syntax_checker.rs b/converter/src/syntax_checker.rs new file mode 100644 index 0000000..334fa9d --- /dev/null +++ b/converter/src/syntax_checker.rs @@ -0,0 +1,201 @@ +use crate::error::Exit; + +pub fn check_markdown_syntax(i: &String) -> Result<(), Exit> { + let mut stack = Vec::new(); + let lines: Vec<&str> = i.lines().collect(); + let mut anchors = Vec::new(); + let mut heading_ids = Vec::new(); + + for (line_num, line) in lines.iter().enumerate() { + let line_num = line_num as i64 + 1; + + // Check for headings to collect anchor IDs + if line.starts_with('#') { + let heading_text = line.trim_start_matches('#').trim(); + let id = heading_text + .to_lowercase() + .chars() + .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_') + .collect::(); + if !id.is_empty() { + heading_ids.push(id); + } + } + + let mut chars = line.chars().enumerate().peekable(); + while let Some((pos, ch)) = chars.next() { + let pos = pos as i64 + 1; + + match ch { + '[' => { + // Check if it's a link or image + let is_image = chars.peek().map(|&(_, c)| c) == Some('!'); + if is_image { + chars.next(); // Skip '!' + } + stack.push(('['.to_string(), line_num, pos, is_image)); + } + ']' => { + if let Some((last, _l, b, is_image)) = stack.pop() { + if last != "[" { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: format!( + "Mismatched bracket: expected '[' but found '{}'", + last + ), + line: line_num, + begin: b, + end: pos, + }); + } + // Check if it's followed by '(' for a link + if chars.peek().map(|&(_, c)| c) == Some('(') { + chars.next(); // Skip '(' + // Look for closing ')' + let mut found = false; + let mut anchor_started = false; + let mut anchor = String::new(); + while let Some((_, c)) = chars.next() { + if c == ')' { + found = true; + break; + } + if c == '#' && !anchor_started { + anchor_started = true; + continue; + } + if anchor_started { + anchor.push(c); + } + } + if !found { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: "Link parentheses not closed".to_string(), + line: line_num, + begin: pos, + end: pos, + }); + } + if !anchor.is_empty() { + // Remove whitespace from anchor + let anchor = anchor.replace(|c: char| c.is_whitespace(), ""); + anchors.push((anchor, line_num, pos)); + } + } else if !is_image { + // It's a reference link, collect the anchor + // Check for anchor like [](#anchor) + if chars.peek().map(|&(_, c)| c) == Some('(') { + chars.next(); // Skip '(' + if chars.peek().map(|&(_, c)| c) == Some('#') { + chars.next(); // Skip '#' + let mut anchor = String::new(); + while let Some(&(_, c)) = chars.peek() { + if c == ')' { + break; + } + anchor.push(c); + chars.next(); + } + if !anchor.is_empty() { + // Remove whitespace from anchor + let anchor = + anchor.replace(|c: char| c.is_whitespace(), ""); + anchors.push((anchor, line_num, pos)); + } + } + } + } + } else { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: "Unmatched ']'".to_string(), + line: line_num, + begin: pos, + end: pos, + }); + } + } + '(' => { + // Check for standalone anchor like (#anchor) + if chars.peek().map(|&(_, c)| c) == Some('#') { + chars.next(); // Skip '#' + let mut anchor = String::new(); + while let Some(&(_, c)) = chars.peek() { + if c == ')' { + break; + } + anchor.push(c); + chars.next(); + } + if !anchor.is_empty() { + // Remove whitespace from anchor + let anchor = anchor.replace(|c: char| c.is_whitespace(), ""); + anchors.push((anchor, line_num, pos)); + } + } else { + stack.push(('('.to_string(), line_num, pos, false)); + } + } + ')' => { + if let Some((last, _l, b, _)) = stack.pop() { + if last != "(" { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: format!( + "Mismatched parenthesis: expected '(' but found '{}'", + last + ), + line: line_num, + begin: b, + end: pos, + }); + } + } else { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: "Unmatched ')'".to_string(), + line: line_num, + begin: pos, + end: pos, + }); + } + } + '`' => { + // Check for backticks + let mut count = 1; + while chars.peek().map(|&(_, c)| c) == Some('`') { + count += 1; + chars.next(); + } + let marker = "`".repeat(count); + + if let Some((last, _, _, _)) = stack.last() { + if last == &marker { + stack.pop(); + } else { + stack.push((marker.clone(), line_num, pos, false)); + } + } else { + stack.push((marker, line_num, pos, false)); + } + } + _ => {} + } + } + } + + // Check for unclosed brackets/parentheses + if let Some((last, line, begin, _)) = stack.pop() { + return Err(Exit::SyntaxError { + content: lines[(line - 1) as usize].to_string(), + reason: format!("Unclosed '{}'", last), + line, + begin, + end: begin, + }); + } + + Ok(()) +} diff --git a/converter/src/utils.rs b/converter/src/utils.rs new file mode 100644 index 0000000..0fbb516 --- /dev/null +++ b/converter/src/utils.rs @@ -0,0 +1 @@ +pub mod path_fmt; diff --git a/converter/src/utils/path_fmt.rs b/converter/src/utils/path_fmt.rs new file mode 100644 index 0000000..8750db6 --- /dev/null +++ b/converter/src/utils/path_fmt.rs @@ -0,0 +1,123 @@ +use std::path::{Path, PathBuf}; + +/// Normalize an input path string into a canonical, platform‑agnostic form. +/// +/// This function removes ANSI escape sequences, unifies separators to `/`, +/// collapses duplicate slashes, strips unfriendly characters (`*`, `?`, `"`, `<`, `>`, `|`), +/// resolves simple `..` components, and preserves a trailing slash when present. +/// +/// See examples below for the exact normalization behavior. +/// +/// # Examples +/// +/// ``` +/// # use string_proc::format_path::format_path_str; +/// use std::io::Error; +/// +/// # fn main() -> Result<(), Error> { +/// assert_eq!(format_path_str("C:\\Users\\\\test")?, "C:/Users/test"); +/// assert_eq!( +/// format_path_str("/path/with/*unfriendly?chars")?, +/// "/path/with/unfriendlychars" +/// ); +/// assert_eq!(format_path_str("\x1b[31m/path\x1b[0m")?, "/path"); +/// assert_eq!(format_path_str("/home/user/dir/")?, "/home/user/dir/"); +/// assert_eq!( +/// format_path_str("/home/user/file.txt")?, +/// "/home/user/file.txt" +/// ); +/// assert_eq!( +/// format_path_str("/home/my_user/DOCS/JVCS_TEST/Workspace/../Vault/")?, +/// "/home/my_user/DOCS/JVCS_TEST/Vault/" +/// ); +/// assert_eq!(format_path_str("./home/file.txt")?, "home/file.txt"); +/// assert_eq!(format_path_str("./home/path/")?, "home/path/"); +/// assert_eq!(format_path_str("./")?, ""); +/// # Ok(()) +/// # } +/// ``` +pub fn format_path_str(path: impl Into) -> Result { + let path_str = path.into(); + let ends_with_slash = path_str.ends_with('/'); + + // ANSI Strip + let cleaned = strip_ansi_escapes::strip(&path_str); + let path_without_ansi = String::from_utf8(cleaned) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + + let path_with_forward_slash = path_without_ansi.replace('\\', "/"); + let mut result = String::new(); + let mut prev_char = '\0'; + + for c in path_with_forward_slash.chars() { + if c == '/' && prev_char == '/' { + continue; + } + result.push(c); + prev_char = c; + } + + let unfriendly_chars = ['*', '?', '"', '<', '>', '|']; + result = result + .chars() + .filter(|c| !unfriendly_chars.contains(c)) + .collect(); + + // Handle ".." path components + let path_buf = PathBuf::from(&result); + let normalized_path = normalize_path(&path_buf); + result = normalized_path.to_string_lossy().replace('\\', "/"); + + // Restore trailing slash if original path had one + if ends_with_slash && !result.ends_with('/') { + result.push('/'); + } + + // Special case: when result is only "./", return "" + if result == "./" { + return Ok(String::new()); + } + + Ok(result) +} + +/// Normalize path by resolving ".." components without requiring file system access +fn normalize_path(path: &Path) -> PathBuf { + let mut components = Vec::new(); + + for component in path.components() { + match component { + std::path::Component::ParentDir => { + if !components.is_empty() { + components.pop(); + } + } + std::path::Component::CurDir => { + // Skip current directory components + } + _ => { + components.push(component); + } + } + } + + if components.is_empty() { + PathBuf::from(".") + } else { + components.iter().collect() + } +} + +/// Format a [`PathBuf`] into its canonical string form and convert it back. +/// +/// This is a convenience wrapper around [`format_path_str`], preserving +/// the semantics of [`PathBuf`] while applying the same normalization rules: +/// - normalize separators to `/` +/// - remove duplicated separators +/// - strip ANSI escape sequences +/// - remove unfriendly characters (`*`, `?`, etc.) +/// - resolve simple `..` segments +pub fn format_path(path: impl Into) -> Result { + let path_str = format_path_str(path.into().display().to_string())?; + Ok(PathBuf::from(path_str)) +} diff --git a/converter/usage.txt b/converter/usage.txt new file mode 100644 index 0000000..e03897e --- /dev/null +++ b/converter/usage.txt @@ -0,0 +1,6 @@ +mdialogc -i -o + -i --input Input file (Markdown / MarkDialog source) + -o, --output Output file (Generated IR / result) + + -h, --help Show this content + -v, --version Show version diff --git a/converter/version.txt b/converter/version.txt new file mode 100644 index 0000000..032f755 --- /dev/null +++ b/converter/version.txt @@ -0,0 +1,2 @@ +mdialogc (0.1.0) +Mark Dialog Converter - Copyright © 2026 Weicao-CatilGrass diff --git a/parser/Cargo.toml b/parser/Cargo.toml deleted file mode 100644 index 2d7cb0c..0000000 --- a/parser/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "markdialog_parser" -workspaces.version = true -edition = "2024" - -[dependencies] -colored = "3.0" -strip-ansi-escapes = "0.2.1" -unicode-width = "0.2" -regex = "1.12" -sha2 = "0.10" diff --git a/parser/src/bin/mdialogp.rs b/parser/src/bin/mdialogp.rs deleted file mode 100644 index 50ed06f..0000000 --- a/parser/src/bin/mdialogp.rs +++ /dev/null @@ -1,95 +0,0 @@ -use markdialog_parser::{ - error::{Exit, handle_exit}, - parse::parse, - special_argument, special_flag, -}; -use std::{path::PathBuf, str::FromStr}; - -fn process() -> Result<(), Exit> { - let mut args: Vec = std::env::args().skip(1).collect(); - - let help = special_flag!(args, "--help") || special_flag!(args, "-h"); - let version = special_flag!(args, "--version") || special_flag!(args, "-v"); - - if version { - let version = include_str!("../../../version.txt"); - println!("{}", version.trim()); - return Err(Exit::Code(0)); - } - - if help || args.len() < 1 { - let usage = include_str!("../../../usage.txt"); - println!("{}", usage.trim()); - return Err(Exit::Code(0)); - } - - let input_file = get_input_file(&mut args)?; - let output_ir_file = get_output_ir_file(&mut args)?.unwrap_or_else(|| { - let mut path = input_file.clone(); - if let Some(file_name) = path.file_name() { - let mut new_name = std::ffi::OsString::new(); - new_name.push(file_name); - // Change extension to .dialog - path.set_extension("dialog"); - } else { - path.set_file_name("ir.dialog"); - } - path - }); - - parse(input_file, output_ir_file)?; - - Ok(()) -} - -fn get_input_file(args: &mut Vec) -> Result { - let input = match special_argument!(args, "--input") { - Some(i) => i, - None => match special_argument!(args, "-i") { - Some(i) => i, - None => { - eprintln!("Missing required input argument. Use --input or -i."); - std::process::exit(2); - } - }, - }; - - let input_file = PathBuf::from_str(&input).map_err(|_| { - eprintln!("Invalid file path `{}`!", input); - Exit::Code(2) - })?; - - Ok(input_file) -} - -fn get_output_ir_file(args: &mut Vec) -> Result, Exit> { - let input = match special_argument!(args, "--output") { - Some(i) => Some(i), - None => match special_argument!(args, "-o") { - Some(i) => Some(i), - None => None, - }, - }; - - match input { - Some(i) => { - let input_file = PathBuf::from_str(&i).map_err(|_| { - eprintln!("Invalid file path `{}`!", i); - return Exit::Code(2); - })?; - Ok(Some(input_file)) - } - None => Ok(None), - } -} - -fn main() { - // Init colored - #[cfg(windows)] - colored::control::set_virtual_terminal(true).unwrap(); - - match process() { - Ok(_) => {} - Err(e) => handle_exit(e), - } -} diff --git a/parser/src/error.rs b/parser/src/error.rs deleted file mode 100644 index b594165..0000000 --- a/parser/src/error.rs +++ /dev/null @@ -1,121 +0,0 @@ -use std::{i64, path::PathBuf, process::exit}; - -use colored::Colorize; -use unicode_width::UnicodeWidthStr; - -#[derive(Debug)] -pub enum Exit { - Code(i32), - IoError(std::io::Error), - FileNotFound(PathBuf), - SyntaxError { - content: String, - reason: String, - line: i64, - begin: i64, - end: i64, - }, - DuplicateMarker(String), - CycleDependency(PathBuf), -} - -impl From for Exit { - fn from(error: std::io::Error) -> Self { - Exit::IoError(error) - } -} - -pub fn handle_exit(e: Exit) { - match e { - Exit::Code(code) => exit(code), - Exit::IoError(error) => print_parse_error(error.to_string()), - Exit::FileNotFound(path_buf) => { - eprintln!("File `{}` not found!", path_buf.display()); - exit(1) - } - Exit::SyntaxError { - content, - reason, - line, - begin, - end, - } => { - print_syntax_error(content, reason, line, begin, end); - } - Exit::DuplicateMarker(marker) => { - eprintln!("Duplicate marker `{}` found!", marker); - exit(1) - } - Exit::CycleDependency(dialog) => { - eprintln!("Dialog `{}` depends on itself!", dialog.display()); - exit(1) - } - } -} - -fn print_parse_error(content: impl AsRef) { - eprintln!("Parse Error !"); - eprintln!("{}", content.as_ref().trim()); - exit(1); -} - -macro_rules! line { - ($line:expr, $N:expr) => { - if $line + $N <= 0 { - " ".to_string() - } else { - ($line + $N).to_string() - } - }; -} - -pub fn print_syntax_error(content: String, reason: String, line: i64, begin: i64, end: i64) { - let content_len = content.width() as i64; - let end = end.clamp(begin, content_len); - - eprintln!("{}", "Parse Failed: Syntax Error".bright_yellow()); - eprintln!("{}{}", line!(line, -1), "|"); - - let before: String = content.chars().take(begin.max(0) as usize).collect(); - let highlight_len = (end - begin).max(1) as usize; - let highlight: String = content - .chars() - .skip(begin.max(0) as usize) - .take(highlight_len) - .collect(); - let after: String = content - .chars() - .skip((begin.max(0) + highlight_len as i64) as usize) - .collect(); - - eprintln!( - "{}{} {}{}{}", - line.to_string().cyan(), - "|".cyan(), - before.cyan(), - highlight.bright_cyan(), - after.cyan() - ); - - let prefix_chars: String = content.chars().take(begin.max(0) as usize).collect(); - let prefix_width = prefix_chars.width() as usize; - - eprintln!( - "{}{} {}", - line!(line, 1), - "|", - format!( - "{}{}____ {}", - " ".repeat(prefix_width), - "^".repeat(((end - begin).max(1)) as usize), - reason - ) - .bright_cyan() - ); - eprintln!("{}{}", line!(line, 2), "|"); - eprintln!( - "{}", - "Please fix the issue and run the program again".bright_yellow() - ); - exit(1); -} diff --git a/parser/src/lib.rs b/parser/src/lib.rs deleted file mode 100644 index d7caac3..0000000 --- a/parser/src/lib.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod error; -pub mod macros; -pub mod parse; -pub mod syntax_checker; -pub mod utils; diff --git a/parser/src/macros.rs b/parser/src/macros.rs deleted file mode 100644 index 894b3f4..0000000 --- a/parser/src/macros.rs +++ /dev/null @@ -1,33 +0,0 @@ -#[macro_export] -macro_rules! special_flag { - ($args:expr, $flag:expr) => {{ - let flag = $flag; - let found = $args.iter().any(|arg| arg == flag); - $args.retain(|arg| arg != flag); - found - }}; -} - -#[macro_export] -macro_rules! special_argument { - ($args:expr, $flag:expr) => {{ - let flag = $flag; - let mut value: Option = None; - let mut i = 0; - while i < $args.len() { - if $args[i] == flag { - if i + 1 < $args.len() { - value = Some($args[i + 1].clone()); - $args.remove(i + 1); - $args.remove(i); - } else { - value = None; - $args.remove(i); - } - break; - } - i += 1; - } - value - }}; -} diff --git a/parser/src/parse.rs b/parser/src/parse.rs deleted file mode 100644 index c4c69f8..0000000 --- a/parser/src/parse.rs +++ /dev/null @@ -1,1279 +0,0 @@ -use std::path::{Path, PathBuf}; - -use regex::Regex; -use sha2::{Digest, Sha256}; - -use crate::{error::Exit, syntax_checker::check_markdown_syntax, utils::path_fmt::format_path}; - -pub fn parse(input: PathBuf, ir_output: PathBuf) -> Result<(), Exit> { - let result = std::fs::read_to_string(&input)?; - - check_markdown_syntax(&result)?; - - let result = unwrap_includes(result, input)?; - - check_duplicate_marker(&result)?; - - let result = clean_markdown(result)?; - let result = fix_mark_jump(result)?; - let result = replace_marker_name(result)?; - let result = convert_to_step_sentence_structure(result)?; - let result = strip_invalid_jump(result)?; - let result = convert_image_to_code(result)?; - let result = apply_code_lines(result)?; - let result = split_sentence_and_encode(result)?; - - std::fs::write(&ir_output, result)?; - Ok(()) -} - -/// Expand text includes of [[Dialog.md]] -pub fn unwrap_includes(input: String, self_path: PathBuf) -> Result { - let mut stack = Vec::::new(); - expand_recursive(input, &self_path, &mut stack) -} - -fn expand_recursive( - content: String, - current_path: &Path, - stack: &mut Vec, -) -> Result { - let mut output = String::new(); - let mut in_code_block = false; - - let current_norm = format_path(current_path)?; - - if stack.contains(¤t_norm) { - return Err(Exit::CycleDependency(current_norm)); - } - - stack.push(current_norm.clone()); - - for line in content.lines() { - if line.trim().starts_with("```") { - in_code_block = !in_code_block; - output.push_str(line); - output.push('\n'); - continue; - } - - if in_code_block { - output.push_str(line); - output.push('\n'); - continue; - } - - if let Some(include_path) = extract_include(line) { - let include_abs = format_path(¤t_path.parent().unwrap().join(include_path))?; - let include_content = std::fs::read_to_string(&include_abs).map_err(|e| { - if e.kind() == std::io::ErrorKind::NotFound { - Exit::FileNotFound(include_abs.clone()) - } else { - Exit::IoError(e) - } - })?; - - let expanded = expand_recursive(include_content, &include_abs, stack)?; - output.push_str(&expanded); - } else { - output.push_str(line); - output.push('\n'); - } - } - - stack.pop(); - - Ok(output) -} - -fn extract_include(line: &str) -> Option<&str> { - line.trim() - .strip_prefix("[[") - .and_then(|s| s.strip_suffix("]]")) -} - -/// Check for duplicate markers -pub fn check_duplicate_marker(input: &String) -> Result<(), Exit> { - let mut seen = std::collections::HashSet::new(); - let heading_re = Regex::new(r"^(#{1,5})\s+(.+)$").unwrap(); - - for line in input.lines() { - if let Some(caps) = heading_re.captures(line) { - let heading_text = caps[2].trim().to_string(); - if seen.contains(&heading_text) { - return Err(Exit::DuplicateMarker(heading_text)); - } - seen.insert(heading_text); - } - } - - Ok(()) -} - -/// Clean Markdown -/// 1. Remove blockquotes -/// 2. Remove empty lines -/// 3. Trim each line -pub fn clean_markdown(i: String) -> Result { - let lines = i.lines(); - let mut cleaned = Vec::new(); - - for line in lines { - if line.starts_with('>') { - continue; - } - let trimmed = line.trim(); - if trimmed.is_empty() { - continue; - } - cleaned.push(trimmed.to_string()); - } - - Ok(cleaned.join("\n")) -} - -#[cfg(test)] -mod test_clean_markdown { - use super::*; - - #[test] - fn test_clean_markdown_removes_blockquotes() { - let input = "> This is a blockquote\nNormal text\n> Another blockquote".to_string(); - let expected = "Normal text".to_string(); - let Ok(result) = clean_markdown(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_clean_markdown_removes_empty_lines() { - let input = "Line 1\n\n\nLine 2\n\n".to_string(); - let expected = "Line 1\nLine 2".to_string(); - let Ok(result) = clean_markdown(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_clean_markdown_trims_lines() { - let input = " Line 1 \n\tLine 2\t\n".to_string(); - let expected = "Line 1\nLine 2".to_string(); - let Ok(result) = clean_markdown(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_clean_markdown_combined() { - let input = "> Blockquote\n\n Line 1 \n> Another\n\nLine 2\n\n".to_string(); - let expected = "Line 1\nLine 2".to_string(); - let Ok(result) = clean_markdown(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_clean_markdown_empty_input() { - let input = "".to_string(); - let expected = "".to_string(); - let Ok(result) = clean_markdown(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_clean_markdown_only_blockquotes() { - let input = "> Quote 1\n> Quote 2".to_string(); - let expected = "".to_string(); - let Ok(result) = clean_markdown(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_clean_markdown_only_whitespace() { - let input = " \n\t\n ".to_string(); - let expected = "".to_string(); - let Ok(result) = clean_markdown(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } -} - -/// Fix jump syntax in each line -/// 1. Correct the following syntax -/// ```ignore -/// - It's [Item](#Mark) -/// > corrected to -/// - It's Item [](#Mark) -/// ``` -/// -/// 2. If there are multiple options, take the first one -/// ```ignore -/// - There might be two options: [A](#A) and [B](#B)! -/// > corrected to -/// - There might be two options: A and B! [](#A) -/// ``` -pub fn fix_mark_jump(i: String) -> Result { - let mut result = String::new(); - - for line in i.lines() { - let (processed_content, first_link_dest) = helper_process_line_content(line); - let processed_line = helper_format_line_with_link(processed_content, first_link_dest); - let final_line = helper_convert_ordered_list_marker(processed_line); - - result.push_str(&final_line); - result.push('\n'); - } - - if result.ends_with('\n') { - result.pop(); - } - - Ok(result) -} - -/// Process line content, extract link text and return the first link target -/// -/// # Examples -/// -/// ``` -/// use markdialog_parser::parse::helper_process_line_content; -/// -/// // Single link -/// let (content, dest) = helper_process_line_content("This is a [Link](#target) Example"); -/// assert_eq!(content, "This is a Link Example"); -/// assert_eq!(dest, Some("target".to_string())); -/// -/// // Extract the first link -/// let (content, dest) = helper_process_line_content("First [link1](#target1) and second [link2](#target2)"); -/// assert_eq!(content, "First link1 and second link2"); -/// assert_eq!(dest, Some("target1".to_string())); -/// -/// // No link -/// let (content, dest) = helper_process_line_content("Text without link"); -/// assert_eq!(content, "Text without link"); -/// assert_eq!(dest, None); -/// -/// // Invalid link -/// let (content, dest) = helper_process_line_content("Invalid [link format"); -/// assert_eq!(content, "Invalid ["); -/// assert_eq!(dest, None); -/// -/// // Empty -/// let (content, dest) = helper_process_line_content(""); -/// assert_eq!(content, ""); -/// assert_eq!(dest, None); -/// -/// // Link target contains spaces and extra # symbols -/// let (content, dest) = helper_process_line_content("Link[text](# target#)"); -/// assert_eq!(content, "Linktext"); -/// assert_eq!(dest, Some("target".to_string())); -/// ``` -pub fn helper_process_line_content(line: &str) -> (String, Option) { - // Check if line is an image line (starts with "![") - if line.starts_with("![") { - // Return the original line unchanged with no link destination - return (line.to_string(), None); - } - - let mut processed = String::new(); - let mut chars = line.chars().peekable(); - let mut first_link_dest = None; - let mut has_link = false; - - while let Some(ch) = chars.next() { - if ch == '[' { - if let Some((link_text, link_dest, remaining_chars)) = helper_parse_link(&mut chars) { - processed.push_str(&link_text); - if !has_link { - first_link_dest = Some(link_dest); - has_link = true; - } - chars = remaining_chars; - continue; - } else { - // Invalid - processed.push(ch); - } - } else { - processed.push(ch); - } - } - - (processed, first_link_dest) -} - -/// Parse possible Markdown links, return (link text, link target, remaining character iterator) -/// -/// # Examples -/// -/// ``` -/// use markdialog_parser::parse::helper_parse_link; -/// -/// // Standard Link -/// let mut chars = "[Link](#target)".chars().peekable(); -/// chars.next(); // Skip '[' -/// let result = helper_parse_link(&mut chars); -/// assert!(result.is_some()); -/// let (text, dest, _) = result.unwrap(); -/// assert_eq!(text, "Link"); -/// assert_eq!(dest, "target"); -/// -/// // Link text contains spaces -/// let mut chars = "[Link text](#target)".chars().peekable(); -/// chars.next(); -/// let result = helper_parse_link(&mut chars); -/// assert!(result.is_some()); -/// let (text, dest, _) = result.unwrap(); -/// assert_eq!(text, "Link text"); -/// assert_eq!(dest, "target"); -/// -/// // Link target contains spaces and extra # symbols -/// let mut chars = "[text](# target#)".chars().peekable(); -/// chars.next(); -/// let result = helper_parse_link(&mut chars); -/// assert!(result.is_some()); -/// let (text, dest, _) = result.unwrap(); -/// assert_eq!(text, "text"); -/// assert_eq!(dest, "target"); -/// -/// // Invalid format: missing ']' -/// let mut chars = "[Link(#target)".chars().peekable(); -/// chars.next(); -/// let result = helper_parse_link(&mut chars); -/// assert!(result.is_none()); -/// -/// // Invalid format: missing '(#' -/// let mut chars = "[Link]target)".chars().peekable(); -/// chars.next(); -/// let result = helper_parse_link(&mut chars); -/// assert!(result.is_none()); -/// -/// // Invalid format: missing ')' -/// let mut chars = "[Link](#target".chars().peekable(); -/// chars.next(); -/// let result = helper_parse_link(&mut chars); -/// assert!(result.is_some()); -/// let (text, dest, _) = result.unwrap(); -/// assert_eq!(text, "Link"); -/// assert_eq!(dest, "target"); -/// ``` -pub fn helper_parse_link<'a>( - chars: &mut std::iter::Peekable>, -) -> Option<(String, String, std::iter::Peekable>)> { - let mut link_text = String::new(); - - while let Some(&ch) = chars.peek() { - chars.next(); - if ch == ']' { - break; - } - link_text.push(ch); - } - - if chars.next() != Some('(') || chars.next() != Some('#') { - return None; - } - - let mut link_dest = String::new(); - while let Some(ch) = chars.next() { - if ch == ')' { - break; - } - link_dest.push(ch); - } - - let cleaned_dest = link_dest.trim().replace(' ', "").replace('#', ""); - - Some((link_text, cleaned_dest, chars.clone())) -} - -/// If there is a link dest, add a jump marker at the end of the line -/// -/// # Examples -/// -/// ``` -/// use markdialog_parser::parse::helper_format_line_with_link; -/// -/// // With a link dest -/// let content = "Some content".to_string(); -/// let link_dest = Some("target".to_string()); -/// let result = helper_format_line_with_link(content, link_dest); -/// assert_eq!(result, "Some content [](#target)"); -/// -/// // With empty content and a link dest -/// let content = "".to_string(); -/// let link_dest = Some("target".to_string()); -/// let result = helper_format_line_with_link(content, link_dest); -/// assert_eq!(result, "[](#target)"); -/// -/// // With trailing spaces in content -/// let content = "Content with spaces ".to_string(); -/// let link_dest = Some("target".to_string()); -/// let result = helper_format_line_with_link(content, link_dest); -/// assert_eq!(result, "Content with spaces [](#target)"); -/// -/// // Without a link dest -/// let content = "Some content".to_string(); -/// let link_dest = None; -/// let result = helper_format_line_with_link(content, link_dest); -/// assert_eq!(result, "Some content"); -/// -/// // With an empty link dest -/// let content = "Some content".to_string(); -/// let link_dest = Some("".to_string()); -/// let result = helper_format_line_with_link(content, link_dest); -/// assert_eq!(result, "Some content"); -/// -/// // With whitespace-only link dest -/// let content = "Some content".to_string(); -/// let link_dest = Some(" ".to_string()); -/// let result = helper_format_line_with_link(content, link_dest); -/// assert_eq!(result, "Some content"); -/// ``` -pub fn helper_format_line_with_link(content: String, link_dest: Option) -> String { - match link_dest { - Some(dest) if !dest.trim().is_empty() => { - format!("{} [](#{})", content.trim_end(), dest.trim()) - .trim() - .to_string() - } - _ => content, - } -} - -/// Convert ordered list markers to unordered list markers -/// -/// # Examples -/// -/// ``` -/// use markdialog_parser::parse::helper_convert_ordered_list_marker; -/// -/// // Basic conversion -/// let input = "1. First item".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, "- First item"); -/// -/// // Multi-digit numbers -/// let input = "10. Tenth item".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, "- Tenth item"); -/// -/// // With leading spaces -/// let input = " 2. Second item".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, "- Second item"); -/// -/// // Not an ordered list marker (no dot and space) -/// let input = "1.Not a list".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, "1.Not a list"); -/// -/// // Not an ordered list marker (different spacing) -/// let input = "1. Extra space".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, "- Extra space"); -/// -/// // Already unordered list -/// let input = "- Already unordered".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, "- Already unordered"); -/// -/// // Regular text -/// let input = "This is not a list".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, "This is not a list"); -/// -/// // Empty string -/// let input = "".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, ""); -/// -/// // Only whitespace -/// let input = " ".to_string(); -/// let result = helper_convert_ordered_list_marker(input); -/// assert_eq!(result, " "); -/// ``` -pub fn helper_convert_ordered_list_marker(line: String) -> String { - let trimmed = line.trim_start(); - - if let Some(_rest) = trimmed.strip_prefix(|c: char| c.is_ascii_digit()) { - let mut chars = trimmed.chars(); - let mut digit_count = 0; - - while let Some(c) = chars.next() { - if c.is_ascii_digit() { - digit_count += 1; - } else { - break; - } - } - - if digit_count > 0 { - let rest_after_digits = &trimmed[digit_count..]; - if let Some(content) = rest_after_digits.strip_prefix(". ") { - return format!("- {}", content); - } - } - } - - line -} - -#[cfg(test)] -mod test_fix_mark_jump { - use super::*; - - #[test] - fn test_fix_mark_jump_single_link() { - let input = "- It's [Item](#Mark)".to_string(); - let expected = "- It's Item [](#Mark)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_multiple_links_takes_first() { - let input = "- There might be two options: [A](#A) and [B](#B)!".to_string(); - let expected = "- There might be two options: A and B! [](#A)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_no_link() { - let input = "- Just a normal line".to_string(); - let expected = "- Just a normal line".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_empty_line() { - let input = "".to_string(); - let expected = "".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_multiple_lines() { - let input = "- First [Item](#First)\n- Second [Item](#Second)".to_string(); - let expected = "- First Item [](#First)\n- Second Item [](#Second)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_link_at_end() { - let input = "- End with [link](#target)".to_string(); - let expected = "- End with link [](#target)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_link_at_beginning() { - let input = "- [Start](#target) with link".to_string(); - let expected = "- Start with link [](#target)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_link_in_middle() { - let input = "- Text [middle](#target) text".to_string(); - let expected = "- Text middle text [](#target)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_ordered_list_conversion() { - let input = "1. [Item](#target)".to_string(); - let expected = "- Item [](#target)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_ordered_list_multiple_digits() { - let input = "10. [Tenth](#target) item".to_string(); - let expected = "- Tenth item [](#target)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_mixed_ordered_and_unordered() { - let input = "1. [First](#first)\n- [Second](#second)\n2. [Third](#third)".to_string(); - let expected = "- First [](#first)\n- Second [](#second)\n- Third [](#third)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_invalid_link_format() { - let input = "- Invalid [link format".to_string(); - let expected = "- Invalid [".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_link_with_spaces_in_target() { - let input = "- Link [text](# target#)".to_string(); - let expected = "- Link text [](#target)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_empty_link_text() { - let input = "- [](#target)".to_string(); - let expected = "- [](#target)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_only_whitespace() { - let input = " ".to_string(); - let expected = " ".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } - - #[test] - fn test_fix_mark_jump_complex_multiple_links() { - let input = "- Choose [A](#A), [B](#B), or [C](#C)!".to_string(); - let expected = "- Choose A, B, or C! [](#A)".to_string(); - let Ok(result) = fix_mark_jump(input) else { - panic!("Parse error!"); - }; - assert_eq!(result, expected); - } -} - -/// Replace marker names: replace heading text and link anchors with corresponding SHA256 -/// -/// Example: -/// ```ignore -/// # Original text -/// # Chapter Title -/// - Jump to [Chapter Title](#Chapter Title) -/// -/// # After processing -/// # a1b2c3d4 -/// - Jump to [](#a1b2c3d4) -/// ``` -pub fn replace_marker_name(i: String) -> Result { - let mut result = i; - - let heading_re = Regex::new(r"^(#{1,5})\s+(.+)$").unwrap(); - let mut heading_map = std::collections::HashMap::new(); - - for line in result.lines() { - if let Some(caps) = heading_re.captures(line) { - let heading_text = caps[2].trim().to_string(); - let hash = format!("{:x}", Sha256::digest(heading_text.as_bytes())); - let short_hash = &hash[..8]; - heading_map.insert(heading_text, short_hash.to_string()); - } - } - - let mut lines: Vec = Vec::new(); - for line in result.lines() { - if let Some(caps) = heading_re.captures(line) { - let level = &caps[1]; - let heading_text = caps[2].trim(); - - if let Some(hash) = heading_map.get(heading_text) { - lines.push(format!("{} {}", level, hash)); - } else { - lines.push(line.to_string()); - } - } else { - lines.push(line.to_string()); - } - } - result = lines.join("\n"); - - let link_re = Regex::new(r"\[\]\(#([^)]+)\)").unwrap(); - result = link_re - .replace_all(&result, |caps: ®ex::Captures| { - let anchor_name = &caps[1]; - if let Some(hash) = heading_map.get(anchor_name) { - format!("[](#{})", hash) - } else { - let hash = format!("{:x}", Sha256::digest(anchor_name.as_bytes())); - let short_hash = &hash[..8]; - format!("[](#{})", short_hash) - } - }) - .to_string(); - - Ok(result) -} - -/// Split content into Step + Sentence structure -pub fn convert_to_step_sentence_structure(input: String) -> Result { - let mut result = String::new(); - let mut current_marker = String::new(); - let mut current_step_id = 0; - let mut current_character = String::new(); - let mut has_no_switch_flag = false; - - let mut code_record_mode = false; - let mut option_record_mode = false; - - let mut sentences_buffer = String::new(); - for line in input.split("\n") { - // Record code - if code_record_mode { - // If code block marker is found again, end code recording - if line.starts_with("```") && code_record_mode { - sentences_buffer.push_str("`\n"); - code_record_mode = false; - continue; - } - sentences_buffer.push_str(format!("{}\\n", line).as_str()); - continue; - } - - // Record options - if option_record_mode { - // Still an option, continue appending - if line.starts_with("- ") { - let (sentence, next) = helper_get_jump_from_line(line); - let next = if let Some(next) = next { - format!("->[#{}_0]", next) - } else { - next_flag(current_marker.as_str(), current_step_id) - }; - let option_line = format!( - "{}[{}]{}", - character(¤t_character, has_no_switch_flag), - sentence, - next - ); - sentences_buffer.push_str(option_line.as_str()); - sentences_buffer.push('\n'); - continue; - } else { - // When ending option recording, create and advance one Step - result.push_str(step_line(current_marker.as_str(), current_step_id).as_str()); - result.push('\n'); - result.push_str(sentences_buffer.as_str()); - sentences_buffer.clear(); - current_step_id += 1; - // Clean "Has no switch flag" - has_no_switch_flag = false; - // Close option mode - option_record_mode = false; - // Do not continue here, proceed to process subsequent content - } - } - - // Refresh heading - if helper_is_marker(line) { - current_marker = helper_read_maker(line).to_string(); - current_step_id = 0; - continue; - } - - // Refresh character - if helper_is_character(line) { - let (character, no_switch_flag) = helper_read_character(line); - current_character = character.to_string(); - has_no_switch_flag = no_switch_flag; - continue; - } - - // Image recording - if line.starts_with('!') { - sentences_buffer.push_str(line); - sentences_buffer.push('\n'); - continue; - } - - // Start code recording - if line.starts_with("```") && !code_record_mode { - sentences_buffer.push('`'); - code_record_mode = true; - continue; - } - - // Option recording - if line.starts_with("- ") { - let (sentence, next) = helper_get_jump_from_line(line); - let next = if let Some(next) = next { - format!("->[#{}_0]", next) - } else { - next_flag(current_marker.as_str(), current_step_id) - }; - let option_line = format!( - "{}[{}]{}", - character(¤t_character, has_no_switch_flag), - sentence, - next - ); - sentences_buffer.push_str(option_line.as_str()); - sentences_buffer.push('\n'); - - // Start option recording mode - if !option_record_mode { - option_record_mode = true; - } - continue; - } - - // Normal sentence - let (sentence, next) = helper_get_jump_from_line(line); - let next = if let Some(next) = next { - format!("->[#{}_0]", next) - } else { - next_flag(current_marker.as_str(), current_step_id) - }; - let sentence_line = format!( - "{}[{}]{}", - character(¤t_character, has_no_switch_flag), - sentence, - next - ); - has_no_switch_flag = false; - - // Create and advance one Step - result.push_str(step_line(current_marker.as_str(), current_step_id).as_str()); - result.push('\n'); - result.push_str(sentences_buffer.as_str()); - sentences_buffer.clear(); - result.push_str(sentence_line.as_str()); - result.push('\n'); - current_step_id += 1; - } - - Ok(result) -} - -pub fn character(character: &str, has_no_switch_flag: bool) -> String { - let flag = if has_no_switch_flag { "*" } else { "" }; - format!("[{}{}{}]:", &flag, character, &flag) -} - -pub fn step_name(marker: &str, current_id: i64) -> String { - format!("{}_{}", marker, current_id) -} - -pub fn step_line(marker: &str, current_id: i64) -> String { - format!("@@@@@@@@@@ {}_{}", marker, current_id) -} - -pub fn next_flag(marker: &str, current_id: i64) -> String { - format!("->[#{}_{}]", marker, current_id + 1) -} - -pub fn helper_is_marker(line: &str) -> bool { - line.starts_with("# ") - || line.starts_with("## ") - || line.starts_with("### ") - || line.starts_with("#### ") - || line.starts_with("##### ") -} - -pub fn helper_read_maker(line: &str) -> &str { - let trimmed = line.trim_start(); - if trimmed.starts_with('#') { - if trimmed.starts_with("# ") - || trimmed.starts_with("## ") - || trimmed.starts_with("### ") - || trimmed.starts_with("#### ") - || trimmed.starts_with("##### ") - { - let parts: Vec<&str> = trimmed.splitn(2, ' ').collect(); - if parts.len() == 2 { - return parts[1].trim(); - } - } - } - "" -} - -pub fn helper_is_character(line: &str) -> bool { - line.starts_with("######") -} - -pub fn helper_read_character(line: &str) -> (&str, bool) { - let trimmed = line.trim_start(); - if trimmed.starts_with("######") { - let parts: Vec<&str> = trimmed.splitn(2, ' ').collect(); - if parts.len() == 2 { - let character = parts[1].trim(); - if character.starts_with('*') && character.ends_with('*') { - let trimmed = character.trim_matches('*'); - return (trimmed.trim(), true); - } else { - return (character.trim(), false); - } - } - } - ("", false) -} - -pub fn helper_get_jump_from_line(line: &str) -> (String, Option) { - let pattern = r"\[\]\(#([^)]+)\)$"; - let re = Regex::new(pattern).unwrap(); - - if let Some(caps) = re.captures(line.trim_end()) { - let target = caps.get(1).unwrap().as_str(); - let line_without_jump = line - .trim_end() - .replace(&format!(" [](#{})", target), "") - .to_string(); - return ( - line_without_jump.trim_start_matches("- ").to_string(), - Some(format!("{}", target)), - ); - } - - (line.trim_start_matches("- ").to_string(), None) -} - -/// Strip all jumps that have not appeared -pub fn strip_invalid_jump(input: String) -> Result { - let lines: Vec<&str> = input.lines().collect(); - let mut valid_ids = std::collections::HashSet::new(); - - for line in &lines { - if line.starts_with("@@@@@@@@@@ ") { - let id = line.trim_start_matches("@@@@@@@@@@ ").trim(); - valid_ids.insert(id.to_string()); - } - } - - let mut result_lines = Vec::new(); - let link_re = Regex::new(r"\[#([^)]+)\]").unwrap(); - - for line in lines { - let processed_line = link_re.replace_all(line, |caps: ®ex::Captures| { - let id = &caps[1]; - if valid_ids.contains(id) { - format!("[#{}]", id) - } else { - "[]".to_string() - } - }); - result_lines.push(processed_line.to_string()); - } - - Ok(result_lines.join("\n")) -} - -/// Convert image lines to code lines -pub fn convert_image_to_code(input: String) -> Result { - let mut result = String::new(); - let lines: Vec<&str> = input.lines().collect(); - let image_re = Regex::new(r"^!\[[^\]]*\]\(([^)]+)\)$").unwrap(); - - for line in lines { - if let Some(caps) = image_re.captures(line) { - let image_path = caps.get(1).unwrap().as_str(); - result.push_str(&format!("`image \"{}\"`\n", image_path)); - } else { - result.push_str(line); - result.push('\n'); - } - } - - // Remove trailing newline if present - if result.ends_with('\n') { - result.pop(); - } - - Ok(result) -} - -/// Apply code lines to sentences -pub fn apply_code_lines(input: String) -> Result { - let mut out = String::new(); - let lines: Vec<&str> = input.lines().collect(); - - let mut i = 0; - while i < lines.len() { - let line = lines[i]; - - if !line.trim_start().starts_with('`') { - out.push_str(line); - out.push('\n'); - i += 1; - continue; - } - - let mut code_buf = String::new(); - while i < lines.len() && { - let line: &str = lines[i]; - line.trim_start().starts_with('`') - } { - code_buf.push_str(lines[i].trim()); - i += 1; - } - - if i >= lines.len() - || !{ - let line: &str = lines[i]; - line.trim_start().starts_with('[') - } - { - continue; - } - - if i + 1 < lines.len() && { - let line: &str = lines[i + 1]; - line.trim_start().starts_with('[') - } { - continue; - } - - let merged = helper_merge_code_into_sentence(&code_buf, lines[i]); - out.push_str(&merged); - out.push('\n'); - i += 1; - } - - Ok(out) -} - -fn helper_merge_code_into_sentence(code: &str, sentence: &str) -> String { - if let Some(start) = sentence.find(":[") { - if let Some(_) = sentence[start + 2..].find(']') { - let content_start = start + 2; - - let mut result = String::new(); - result.push_str(&sentence[..content_start]); - result.push_str(code); - result.push_str(&sentence[content_start..]); - return result; - } - } - - sentence.to_string() -} - -/// Split sentences into embeddable tokens and perform Unicode encoding -pub fn split_sentence_and_encode(input: String) -> Result { - let mut result = String::new(); - let lines: Vec<&str> = input.lines().collect(); - - for line in lines { - if line.starts_with('[') && line.contains("]:[") && line.contains("]->[") { - if let Some(start) = line.find("]:[") { - if let Some(end) = line.find("]->[") { - let content = &line[start + 3..end]; - let processed_content = helper_process_sentence_content(content); - - let suffix = &line[end + 1..]; - - let char_end = start; - let char_start = 1; - let character = &line[char_start..char_end]; - let encoded_character = helper_encode_unicode(character); - - // Build the new line with encoded character and processed content - let new_line = - format!("[{}]:{}{}", encoded_character, processed_content, suffix); - result.push_str(&format!("{}\n", new_line)); - continue; - } - } - } - result.push_str(&format!("{}\n", line)); - } - - if result.ends_with('\n') { - result.pop(); - } - - Ok(result) -} - -fn helper_process_sentence_content(content: &str) -> String { - let mut result = String::new(); - let mut chars = content.chars().peekable(); - let mut current_text = String::new(); - let mut in_code = false; - let mut in_bold = false; - let mut in_italic = false; - let mut code_buffer = String::new(); - let mut backticks_count = 0; - - while let Some(ch) = chars.next() { - match ch { - '`' => { - backticks_count += 1; - if backticks_count == 1 { - // Start of code block - if !current_text.is_empty() { - let encoded_text = helper_encode_unicode(¤t_text); - result.push_str(&format!("[text:[{}]]", encoded_text)); - current_text.clear(); - } - code_buffer.push(ch); - in_code = true; - } else if backticks_count == 2 && in_code { - // End of code block - code_buffer.push(ch); - let encoded_code = helper_encode_unicode(&code_buffer); - result.push_str(&format!("[code:[{}]]", encoded_code)); - code_buffer.clear(); - backticks_count = 0; - in_code = false; - } else if backticks_count == 1 && !in_code { - // Single backtick in text - current_text.push(ch); - } - } - '*' => { - if in_code { - code_buffer.push(ch); - continue; - } - - // Check for bold - if chars.peek() == Some(&'*') { - chars.next(); // Consume the second '*' - - if in_bold { - // End bold - if !current_text.is_empty() { - let encoded_text = helper_encode_unicode(¤t_text); - result.push_str(&format!("[bold:[{}]]", encoded_text)); - current_text.clear(); - } - in_bold = false; - } else if in_italic { - if !current_text.is_empty() { - let encoded_text = helper_encode_unicode(¤t_text); - result.push_str(&format!("[italic:[{}]]", encoded_text)); - current_text.clear(); - } - in_italic = false; - // Start bold_italic - in_bold = true; - } else { - // Start bold - if !current_text.is_empty() { - let encoded_text = helper_encode_unicode(¤t_text); - result.push_str(&format!("[text:[{}]]", encoded_text)); - current_text.clear(); - } - in_bold = true; - } - } else { - if in_italic { - // End italic - if !current_text.is_empty() { - let encoded_text = helper_encode_unicode(¤t_text); - result.push_str(&format!("[italic:[{}]]", encoded_text)); - current_text.clear(); - } - in_italic = false; - } else if in_bold { - if !current_text.is_empty() { - let encoded_text = helper_encode_unicode(¤t_text); - result.push_str(&format!("[bold:[{}]]", encoded_text)); - current_text.clear(); - } - // Start bold_italic - in_bold = true; - in_italic = true; - } else { - // Start italic - if !current_text.is_empty() { - let encoded_text = helper_encode_unicode(¤t_text); - result.push_str(&format!("[text:[{}]]", encoded_text)); - current_text.clear(); - } - in_italic = true; - } - } - } - _ => { - if in_code { - code_buffer.push(ch); - } else { - current_text.push(ch); - } - } - } - } - - // Handle any remaining text - if !code_buffer.is_empty() { - let encoded_code = helper_encode_unicode(&code_buffer); - result.push_str(&format!("[code:[{}]]", encoded_code)); - } - - if !current_text.is_empty() { - let style = match (in_bold, in_italic) { - (true, true) => "bold_italic", - (true, false) => "bold", - (false, true) => "italic", - (false, false) => "text", - }; - let encoded_text = helper_encode_unicode(¤t_text); - result.push_str(&format!("[{}:[{}]]", style, encoded_text)); - } - - result -} - -fn helper_encode_unicode(s: &str) -> String { - let mut result = String::new(); - for ch in s.chars() { - let code = ch as u32; - if code <= 0x7F { - result.push(ch); - } else { - result.push_str(&format!("\\u{:X}", code)); - } - } - result -} diff --git a/parser/src/syntax_checker.rs b/parser/src/syntax_checker.rs deleted file mode 100644 index 334fa9d..0000000 --- a/parser/src/syntax_checker.rs +++ /dev/null @@ -1,201 +0,0 @@ -use crate::error::Exit; - -pub fn check_markdown_syntax(i: &String) -> Result<(), Exit> { - let mut stack = Vec::new(); - let lines: Vec<&str> = i.lines().collect(); - let mut anchors = Vec::new(); - let mut heading_ids = Vec::new(); - - for (line_num, line) in lines.iter().enumerate() { - let line_num = line_num as i64 + 1; - - // Check for headings to collect anchor IDs - if line.starts_with('#') { - let heading_text = line.trim_start_matches('#').trim(); - let id = heading_text - .to_lowercase() - .chars() - .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_') - .collect::(); - if !id.is_empty() { - heading_ids.push(id); - } - } - - let mut chars = line.chars().enumerate().peekable(); - while let Some((pos, ch)) = chars.next() { - let pos = pos as i64 + 1; - - match ch { - '[' => { - // Check if it's a link or image - let is_image = chars.peek().map(|&(_, c)| c) == Some('!'); - if is_image { - chars.next(); // Skip '!' - } - stack.push(('['.to_string(), line_num, pos, is_image)); - } - ']' => { - if let Some((last, _l, b, is_image)) = stack.pop() { - if last != "[" { - return Err(Exit::SyntaxError { - content: line.to_string(), - reason: format!( - "Mismatched bracket: expected '[' but found '{}'", - last - ), - line: line_num, - begin: b, - end: pos, - }); - } - // Check if it's followed by '(' for a link - if chars.peek().map(|&(_, c)| c) == Some('(') { - chars.next(); // Skip '(' - // Look for closing ')' - let mut found = false; - let mut anchor_started = false; - let mut anchor = String::new(); - while let Some((_, c)) = chars.next() { - if c == ')' { - found = true; - break; - } - if c == '#' && !anchor_started { - anchor_started = true; - continue; - } - if anchor_started { - anchor.push(c); - } - } - if !found { - return Err(Exit::SyntaxError { - content: line.to_string(), - reason: "Link parentheses not closed".to_string(), - line: line_num, - begin: pos, - end: pos, - }); - } - if !anchor.is_empty() { - // Remove whitespace from anchor - let anchor = anchor.replace(|c: char| c.is_whitespace(), ""); - anchors.push((anchor, line_num, pos)); - } - } else if !is_image { - // It's a reference link, collect the anchor - // Check for anchor like [](#anchor) - if chars.peek().map(|&(_, c)| c) == Some('(') { - chars.next(); // Skip '(' - if chars.peek().map(|&(_, c)| c) == Some('#') { - chars.next(); // Skip '#' - let mut anchor = String::new(); - while let Some(&(_, c)) = chars.peek() { - if c == ')' { - break; - } - anchor.push(c); - chars.next(); - } - if !anchor.is_empty() { - // Remove whitespace from anchor - let anchor = - anchor.replace(|c: char| c.is_whitespace(), ""); - anchors.push((anchor, line_num, pos)); - } - } - } - } - } else { - return Err(Exit::SyntaxError { - content: line.to_string(), - reason: "Unmatched ']'".to_string(), - line: line_num, - begin: pos, - end: pos, - }); - } - } - '(' => { - // Check for standalone anchor like (#anchor) - if chars.peek().map(|&(_, c)| c) == Some('#') { - chars.next(); // Skip '#' - let mut anchor = String::new(); - while let Some(&(_, c)) = chars.peek() { - if c == ')' { - break; - } - anchor.push(c); - chars.next(); - } - if !anchor.is_empty() { - // Remove whitespace from anchor - let anchor = anchor.replace(|c: char| c.is_whitespace(), ""); - anchors.push((anchor, line_num, pos)); - } - } else { - stack.push(('('.to_string(), line_num, pos, false)); - } - } - ')' => { - if let Some((last, _l, b, _)) = stack.pop() { - if last != "(" { - return Err(Exit::SyntaxError { - content: line.to_string(), - reason: format!( - "Mismatched parenthesis: expected '(' but found '{}'", - last - ), - line: line_num, - begin: b, - end: pos, - }); - } - } else { - return Err(Exit::SyntaxError { - content: line.to_string(), - reason: "Unmatched ')'".to_string(), - line: line_num, - begin: pos, - end: pos, - }); - } - } - '`' => { - // Check for backticks - let mut count = 1; - while chars.peek().map(|&(_, c)| c) == Some('`') { - count += 1; - chars.next(); - } - let marker = "`".repeat(count); - - if let Some((last, _, _, _)) = stack.last() { - if last == &marker { - stack.pop(); - } else { - stack.push((marker.clone(), line_num, pos, false)); - } - } else { - stack.push((marker, line_num, pos, false)); - } - } - _ => {} - } - } - } - - // Check for unclosed brackets/parentheses - if let Some((last, line, begin, _)) = stack.pop() { - return Err(Exit::SyntaxError { - content: lines[(line - 1) as usize].to_string(), - reason: format!("Unclosed '{}'", last), - line, - begin, - end: begin, - }); - } - - Ok(()) -} diff --git a/parser/src/utils.rs b/parser/src/utils.rs deleted file mode 100644 index 0fbb516..0000000 --- a/parser/src/utils.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod path_fmt; diff --git a/parser/src/utils/path_fmt.rs b/parser/src/utils/path_fmt.rs deleted file mode 100644 index 8750db6..0000000 --- a/parser/src/utils/path_fmt.rs +++ /dev/null @@ -1,123 +0,0 @@ -use std::path::{Path, PathBuf}; - -/// Normalize an input path string into a canonical, platform‑agnostic form. -/// -/// This function removes ANSI escape sequences, unifies separators to `/`, -/// collapses duplicate slashes, strips unfriendly characters (`*`, `?`, `"`, `<`, `>`, `|`), -/// resolves simple `..` components, and preserves a trailing slash when present. -/// -/// See examples below for the exact normalization behavior. -/// -/// # Examples -/// -/// ``` -/// # use string_proc::format_path::format_path_str; -/// use std::io::Error; -/// -/// # fn main() -> Result<(), Error> { -/// assert_eq!(format_path_str("C:\\Users\\\\test")?, "C:/Users/test"); -/// assert_eq!( -/// format_path_str("/path/with/*unfriendly?chars")?, -/// "/path/with/unfriendlychars" -/// ); -/// assert_eq!(format_path_str("\x1b[31m/path\x1b[0m")?, "/path"); -/// assert_eq!(format_path_str("/home/user/dir/")?, "/home/user/dir/"); -/// assert_eq!( -/// format_path_str("/home/user/file.txt")?, -/// "/home/user/file.txt" -/// ); -/// assert_eq!( -/// format_path_str("/home/my_user/DOCS/JVCS_TEST/Workspace/../Vault/")?, -/// "/home/my_user/DOCS/JVCS_TEST/Vault/" -/// ); -/// assert_eq!(format_path_str("./home/file.txt")?, "home/file.txt"); -/// assert_eq!(format_path_str("./home/path/")?, "home/path/"); -/// assert_eq!(format_path_str("./")?, ""); -/// # Ok(()) -/// # } -/// ``` -pub fn format_path_str(path: impl Into) -> Result { - let path_str = path.into(); - let ends_with_slash = path_str.ends_with('/'); - - // ANSI Strip - let cleaned = strip_ansi_escapes::strip(&path_str); - let path_without_ansi = String::from_utf8(cleaned) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - - let path_with_forward_slash = path_without_ansi.replace('\\', "/"); - let mut result = String::new(); - let mut prev_char = '\0'; - - for c in path_with_forward_slash.chars() { - if c == '/' && prev_char == '/' { - continue; - } - result.push(c); - prev_char = c; - } - - let unfriendly_chars = ['*', '?', '"', '<', '>', '|']; - result = result - .chars() - .filter(|c| !unfriendly_chars.contains(c)) - .collect(); - - // Handle ".." path components - let path_buf = PathBuf::from(&result); - let normalized_path = normalize_path(&path_buf); - result = normalized_path.to_string_lossy().replace('\\', "/"); - - // Restore trailing slash if original path had one - if ends_with_slash && !result.ends_with('/') { - result.push('/'); - } - - // Special case: when result is only "./", return "" - if result == "./" { - return Ok(String::new()); - } - - Ok(result) -} - -/// Normalize path by resolving ".." components without requiring file system access -fn normalize_path(path: &Path) -> PathBuf { - let mut components = Vec::new(); - - for component in path.components() { - match component { - std::path::Component::ParentDir => { - if !components.is_empty() { - components.pop(); - } - } - std::path::Component::CurDir => { - // Skip current directory components - } - _ => { - components.push(component); - } - } - } - - if components.is_empty() { - PathBuf::from(".") - } else { - components.iter().collect() - } -} - -/// Format a [`PathBuf`] into its canonical string form and convert it back. -/// -/// This is a convenience wrapper around [`format_path_str`], preserving -/// the semantics of [`PathBuf`] while applying the same normalization rules: -/// - normalize separators to `/` -/// - remove duplicated separators -/// - strip ANSI escape sequences -/// - remove unfriendly characters (`*`, `?`, etc.) -/// - resolve simple `..` segments -pub fn format_path(path: impl Into) -> Result { - let path_str = format_path_str(path.into().display().to_string())?; - Ok(PathBuf::from(path_str)) -} diff --git a/src/lib.rs b/src/lib.rs index 728e349..584b845 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,9 +3,9 @@ // pub use markdialog_player::*; // } -// markdialog::parser -pub mod parser { - pub use markdialog_parser::*; +// markdialog::converter +pub mod converter { + pub use markdialog_converter::*; } // markdialog::res diff --git a/usage.txt b/usage.txt deleted file mode 100644 index 94d68b2..0000000 --- a/usage.txt +++ /dev/null @@ -1,6 +0,0 @@ -mdialogp -i -o - -i, --input Input file (Markdown / MarkDialog source) - -o, --output Output file (Generated IR / result) - - -h, --help Show help and exit - -v, --version Show version and exit diff --git a/version.txt b/version.txt deleted file mode 100644 index 6e8bf73..0000000 --- a/version.txt +++ /dev/null @@ -1 +0,0 @@ -0.1.0 -- cgit