diff options
| -rw-r--r-- | .gitignore | 5 | ||||
| -rw-r--r-- | Cargo.lock | 187 | ||||
| -rw-r--r-- | Cargo.toml | 16 | ||||
| -rw-r--r-- | LICENSE-MIT | 9 | ||||
| -rw-r--r-- | README.md | 107 | ||||
| -rw-r--r-- | README_zh_CN.md | 105 | ||||
| -rw-r--r-- | built_res/Cargo.toml | 6 | ||||
| -rw-r--r-- | built_res/src/lib.rs | 2 | ||||
| -rw-r--r-- | built_res/src/res_sentences.rs | 51 | ||||
| -rw-r--r-- | built_res/src/structs.rs | 1 | ||||
| -rw-r--r-- | built_res/src/structs/sentence.rs | 12 | ||||
| -rw-r--r-- | parser/Cargo.toml | 10 | ||||
| -rw-r--r-- | parser/src/bin/mdialogp.rs | 95 | ||||
| -rw-r--r-- | parser/src/error.rs | 106 | ||||
| -rw-r--r-- | parser/src/lib.rs | 4 | ||||
| -rw-r--r-- | parser/src/macros.rs | 33 | ||||
| -rw-r--r-- | parser/src/parse.rs | 918 | ||||
| -rw-r--r-- | parser/src/syntax_checker.rs | 221 | ||||
| -rw-r--r-- | player/Cargo.toml | 6 | ||||
| -rw-r--r-- | player/src/lib.rs | 1 | ||||
| -rw-r--r-- | src/lib.rs | 14 | ||||
| -rw-r--r-- | usage.txt | 6 | ||||
| -rw-r--r-- | version.txt | 1 |
23 files changed, 1916 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..08130ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/target +.temp +*.md +!README.md +!README_zh_CN.md diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..88733d7 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,187 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "built_res" +version = "0.0.0" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "markdialog" +version = "0.1.0" +dependencies = [ + "built_res", + "markdialog_parser", + "markdialog_player", +] + +[[package]] +name = "markdialog_parser" +version = "0.0.0" +dependencies = [ + "colored", + "regex", + "sha2", + "unicode-width", +] + +[[package]] +name = "markdialog_player" +version = "0.0.0" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ee8b721 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "markdialog" +version = "0.1.0" +edition = "2024" + +[workspace] +members = [ + "built_res", # Built Resources + "player", # Dialog Player + "parser" # Markdown Parser +] + +[dependencies] +built_res = { path = "built_res" } +markdialog_player = { path = "player" } +markdialog_parser = { path = "parser" } diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..4ff38a3 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,9 @@ +# The MIT License (MIT) + +Copyright © 2026 Weicao-CatilGrass + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..3789d8f --- /dev/null +++ b/README.md @@ -0,0 +1,107 @@ +# MarkDialog + +> Write your story with Markdown! + +> [!WARNING] +> This article is translated from `README_zh_CN.md` + +> [!NOTE] +> This is a project I tinkered with in my spare time, and many aspects are still rough around the edges. +> +> I also experimentally used **Vibe Coding** in it, so you might encounter some unconventional code. Please bear with me. + +## Why MarkDialog? + +The starting point for this project was actually quite simple: I wanted to prototype an AVG visual novel and needed people around me to **be able to start writing the story immediately**. + +But the reality is: + +- Graphical editors are too heavy. +- Ink and Yarn Spinner are powerful, but both require learning. +- Writing in JSON / XML / custom DSLs isn't "natural" enough. + +What I really wanted was a format that people already know how to write, requires no learning, can be opened and written in immediately, and can be run right after writing. + +Then it suddenly hit me: + +### Isn't Markdown exactly that? + +You can easily write it in **Typora**, **Obsidian**, **VS Code**, or even **Vim/NeoVim**. + +> What more could you ask for? + +When this idea popped into my head, I got really excited. + +And so, MarkDialog was born. + + + +## What is MarkDialog? + +It allows you to: + +- Use level-six headings to switch characters. +- Use regular Markdown for dialogue lines. +- Use lists for choices. +- Use links for jumps. +- Use code blocks for rich text controls like speed, color, and actions. +- Use images to change backgrounds. + +The Markdown you write is compiled into an IR (Intermediate Representation), which can then be parsed anywhere you want! + +For example: + +- Inline it into a Rust project. +- Import it into Unity / Unreal / Godot. +- Convert it to JSON. +- Convert it to anything. + + + +## MarkDialog Syntax? + +Generally speaking, if you know how to write Markdown, you know how to write MarkDialog. + +You can write: + +``` +# Title +> Use blockquotes for comments + +###### Zhang San +Use a level-six heading for **character names**. + +Use unordered lists for choices +- Good morning +- Good afternoon +- Good evening + +Ordered lists work too +1. Good morning +2. Good afternoon +3. Good evening + +> Use hyperlinks for jumps +What's for lunch? +- Pizza [](#Eat_Pizza) +- Pasta [](#Eat_Pasta) +- Nothing +Or *eat nothing at all*! + +## Eat_Pizza +Eat pizza + +## Eat_Pasta +Eat pasta + +> Use images to switch backgrounds + +``` + +It's that simple! + + + +## Open Source License + +Haha, I'm using the MIT License. Feel free to play around with it! diff --git a/README_zh_CN.md b/README_zh_CN.md new file mode 100644 index 0000000..d1da960 --- /dev/null +++ b/README_zh_CN.md @@ -0,0 +1,105 @@ +# MarkDialog + +> 用 Markdown 来书写您的剧情! + +> [!NOTE] +> 这是我闲暇时间折腾出来的项目,很多地方还没打磨好。 +> +> 另外我在里面实验性地使用了 **Vibe Coding**,所以你可能会看到一些不太正经的代码,请多包涵。 + +## 为什么会有 MarkDialog? + +这个项目的起点其实很简单: 我想做一个 AVG 视觉小说的原型,需要让身边的人**能立刻上手写剧情**。 + +但现实是: + +- 图形化编辑器太重 +- Ink、Yarn Spinner 虽然强大,但都需要学习 +- JSON / XML / 自定义 DSL 写起来都不够 “自然” + +我真正想要的是一种大家已经会写、不需要学习、打开就能写、写完就能跑的格式。 + +然后我突然想到: + +### Markdown 不就是这样的吗? + +你可以在 **Typora**、**Obsidian**、**VS Code** 甚至 **Vim/NeoVim** 里轻松地编写他们 + +> 还要什么自行车? + +当这个念头冒出来的时候,我整个人都兴奋了。 + +于是,MarkDialog 就这样诞生了。 + + + +## MarkDialog 是什么? + +它让你可以: + +- 用六级标题切换角色 +- 用普通 Markdown 写台词 +- 用列表写选项 +- 用链接写跳转 +- 用代码块写速度、颜色、动作等富文本控制 +- 用图片切换背景 + +你写的 Markdown 会被编译成一个 IR,然后解析到任何地方,只要你想! + +比如 + +- 内联到 Rust 项目 +- 导入 Unity / Unreal / Godot +- 转换为 Json +- 转换为任何东西 + + + +## MarkDialog 的语法? + +一般来讲,只要你会写 Markdown,你就会写 Markdialog + +你可以: + +``` +# 标题 +> 使用引用块表示注释 + +###### 张三 +使用六级标题表示**角色名称** + +使用无序列表表示选项 +- 早上好 +- 中午好 +- 晚上好 + +有序也行 +1. 早上好 +2. 中午好 +3. 晚上好 + +> 使用超链接表示跳转 +中午吃? +- 披萨 [](#Eat_Pizza) +- 意面 [](#Eat_Pasta) +- 不吃 +或者 *什么都不吃*! + +## Eat_Pizza +吃披萨 + +## Eat_Pasta +吃意面 + +> 使用图像来切换背景 + +``` + +就是如此简单! + + + +## 开源协议 + +哈哈,我采用 MIT License,放心玩去吧! + diff --git a/built_res/Cargo.toml b/built_res/Cargo.toml new file mode 100644 index 0000000..b88a569 --- /dev/null +++ b/built_res/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "built_res" +workspaces.version = true +edition = "2024" + +[dependencies] diff --git a/built_res/src/lib.rs b/built_res/src/lib.rs new file mode 100644 index 0000000..bef5b44 --- /dev/null +++ b/built_res/src/lib.rs @@ -0,0 +1,2 @@ +pub mod res_sentences; +pub mod structs; diff --git a/built_res/src/res_sentences.rs b/built_res/src/res_sentences.rs new file mode 100644 index 0000000..8354ded --- /dev/null +++ b/built_res/src/res_sentences.rs @@ -0,0 +1,51 @@ +use crate::structs::sentence::{Sentence, Token}; + +#[derive(Hash, PartialEq, Eq)] +pub enum SentenceId { + // 在此处确认所有跳转点 + Main0, + Main1, + Main2, + Ok1, +} + +pub fn get_sentence(id: SentenceId) -> Option<Sentence<'static>> { + match id { + SentenceId::Main0 => Some(Sentence { + content_tokens: &[ + &Token::Text("你好我是"), + &Token::Command("red"), + &Token::Text("猫尾草"), + &Token::Command("/"), + ], + next_sentence: Some(SentenceId::Main1), + }), + SentenceId::Main1 => Some(Sentence { + content_tokens: &[ + &Token::Text("你好我是"), + &Token::Command("red"), + &Token::Text("猫尾草"), + &Token::Command("/"), + ], + next_sentence: Some(SentenceId::Main2), + }), + SentenceId::Main2 => Some(Sentence { + content_tokens: &[ + &Token::Text("你好我是"), + &Token::Command("red"), + &Token::Text("猫尾草"), + &Token::Command("/"), + ], + next_sentence: Some(SentenceId::Ok1), + }), + SentenceId::Ok1 => Some(Sentence { + content_tokens: &[ + &Token::Text("你好我是"), + &Token::Command("red"), + &Token::Text("猫尾草"), + &Token::Command("/"), + ], + next_sentence: None, + }), + } +} diff --git a/built_res/src/structs.rs b/built_res/src/structs.rs new file mode 100644 index 0000000..b7bb9ef --- /dev/null +++ b/built_res/src/structs.rs @@ -0,0 +1 @@ +pub mod sentence; diff --git a/built_res/src/structs/sentence.rs b/built_res/src/structs/sentence.rs new file mode 100644 index 0000000..270d5f7 --- /dev/null +++ b/built_res/src/structs/sentence.rs @@ -0,0 +1,12 @@ +pub struct Sentence<'a> { + pub content_tokens: &'a [&'static Token], + pub next_sentence: Option<crate::res_sentences::SentenceId>, +} + +pub enum Token { + Text(&'static str), + BoldText(&'static str), + ItalicText(&'static str), + BoldItalicText(&'static str), + Command(&'static str), +} diff --git a/parser/Cargo.toml b/parser/Cargo.toml new file mode 100644 index 0000000..07e462f --- /dev/null +++ b/parser/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "markdialog_parser" +workspaces.version = true +edition = "2024" + +[dependencies] +colored = "3.0" +unicode-width = "0.2" +regex = "1.12" +sha2 = "0.10" diff --git a/parser/src/bin/mdialogp.rs b/parser/src/bin/mdialogp.rs new file mode 100644 index 0000000..50ed06f --- /dev/null +++ b/parser/src/bin/mdialogp.rs @@ -0,0 +1,95 @@ +use markdialog_parser::{ + error::{Exit, handle_exit}, + parse::parse, + special_argument, special_flag, +}; +use std::{path::PathBuf, str::FromStr}; + +fn process() -> Result<(), Exit> { + let mut args: Vec<String> = std::env::args().skip(1).collect(); + + let help = special_flag!(args, "--help") || special_flag!(args, "-h"); + let version = special_flag!(args, "--version") || special_flag!(args, "-v"); + + if version { + let version = include_str!("../../../version.txt"); + println!("{}", version.trim()); + return Err(Exit::Code(0)); + } + + if help || args.len() < 1 { + let usage = include_str!("../../../usage.txt"); + println!("{}", usage.trim()); + return Err(Exit::Code(0)); + } + + let input_file = get_input_file(&mut args)?; + let output_ir_file = get_output_ir_file(&mut args)?.unwrap_or_else(|| { + let mut path = input_file.clone(); + if let Some(file_name) = path.file_name() { + let mut new_name = std::ffi::OsString::new(); + new_name.push(file_name); + // Change extension to .dialog + path.set_extension("dialog"); + } else { + path.set_file_name("ir.dialog"); + } + path + }); + + parse(input_file, output_ir_file)?; + + Ok(()) +} + +fn get_input_file(args: &mut Vec<String>) -> Result<PathBuf, Exit> { + let input = match special_argument!(args, "--input") { + Some(i) => i, + None => match special_argument!(args, "-i") { + Some(i) => i, + None => { + eprintln!("Missing required input argument. Use --input or -i."); + std::process::exit(2); + } + }, + }; + + let input_file = PathBuf::from_str(&input).map_err(|_| { + eprintln!("Invalid file path `{}`!", input); + Exit::Code(2) + })?; + + Ok(input_file) +} + +fn get_output_ir_file(args: &mut Vec<String>) -> Result<Option<PathBuf>, Exit> { + let input = match special_argument!(args, "--output") { + Some(i) => Some(i), + None => match special_argument!(args, "-o") { + Some(i) => Some(i), + None => None, + }, + }; + + match input { + Some(i) => { + let input_file = PathBuf::from_str(&i).map_err(|_| { + eprintln!("Invalid file path `{}`!", i); + return Exit::Code(2); + })?; + Ok(Some(input_file)) + } + None => Ok(None), + } +} + +fn main() { + // Init colored + #[cfg(windows)] + colored::control::set_virtual_terminal(true).unwrap(); + + match process() { + Ok(_) => {} + Err(e) => handle_exit(e), + } +} diff --git a/parser/src/error.rs b/parser/src/error.rs new file mode 100644 index 0000000..ca68442 --- /dev/null +++ b/parser/src/error.rs @@ -0,0 +1,106 @@ +use std::{i64, process::exit}; + +use colored::Colorize; +use unicode_width::UnicodeWidthStr; + +#[derive(Debug)] +pub enum Exit { + Code(i32), + IoError(std::io::Error), + SyntaxError { + content: String, + reason: String, + line: i64, + begin: i64, + end: i64, + }, +} + +impl From<std::io::Error> for Exit { + fn from(error: std::io::Error) -> Self { + Exit::IoError(error) + } +} + +pub fn handle_exit(e: Exit) { + match e { + Exit::Code(code) => exit(code), + Exit::IoError(error) => print_parse_error(error.to_string()), + Exit::SyntaxError { + content, + reason, + line, + begin, + end, + } => { + print_syntax_error(content, reason, line, begin, end); + } + } +} + +fn print_parse_error(content: impl AsRef<str>) { + eprintln!("Parse Error !"); + eprintln!("{}", content.as_ref().trim()); + exit(1); +} + +macro_rules! line { + ($line:expr, $N:expr) => { + if $line + $N <= 0 { + " ".to_string() + } else { + ($line + $N).to_string() + } + }; +} + +pub fn print_syntax_error(content: String, reason: String, line: i64, begin: i64, end: i64) { + let content_len = content.width() as i64; + let end = end.clamp(begin, content_len); + + eprintln!("{}", "Parse Failed: Syntax Error".bright_yellow()); + eprintln!("{}{}", line!(line, -1), "|"); + + let before: String = content.chars().take(begin.max(0) as usize).collect(); + let highlight_len = (end - begin).max(1) as usize; + let highlight: String = content + .chars() + .skip(begin.max(0) as usize) + .take(highlight_len) + .collect(); + let after: String = content + .chars() + .skip((begin.max(0) + highlight_len as i64) as usize) + .collect(); + + eprintln!( + "{}{} {}{}{}", + line.to_string().cyan(), + "|".cyan(), + before.cyan(), + highlight.bright_cyan(), + after.cyan() + ); + + let prefix_chars: String = content.chars().take(begin.max(0) as usize).collect(); + let prefix_width = prefix_chars.width() as usize; + + eprintln!( + "{}{} {}", + line!(line, 1), + "|", + format!( + "{}{}____ {}", + " ".repeat(prefix_width), + "^".repeat(((end - begin).max(1)) as usize), + reason + ) + .bright_cyan() + ); + eprintln!("{}{}", line!(line, 2), "|"); + eprintln!( + "{}", + "Please fix the issue and run the program again".bright_yellow() + ); + exit(1); +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs new file mode 100644 index 0000000..a462697 --- /dev/null +++ b/parser/src/lib.rs @@ -0,0 +1,4 @@ +pub mod error; +pub mod macros; +pub mod parse; +pub mod syntax_checker; diff --git a/parser/src/macros.rs b/parser/src/macros.rs new file mode 100644 index 0000000..894b3f4 --- /dev/null +++ b/parser/src/macros.rs @@ -0,0 +1,33 @@ +#[macro_export] +macro_rules! special_flag { + ($args:expr, $flag:expr) => {{ + let flag = $flag; + let found = $args.iter().any(|arg| arg == flag); + $args.retain(|arg| arg != flag); + found + }}; +} + +#[macro_export] +macro_rules! special_argument { + ($args:expr, $flag:expr) => {{ + let flag = $flag; + let mut value: Option<String> = None; + let mut i = 0; + while i < $args.len() { + if $args[i] == flag { + if i + 1 < $args.len() { + value = Some($args[i + 1].clone()); + $args.remove(i + 1); + $args.remove(i); + } else { + value = None; + $args.remove(i); + } + break; + } + i += 1; + } + value + }}; +} diff --git a/parser/src/parse.rs b/parser/src/parse.rs new file mode 100644 index 0000000..434cca3 --- /dev/null +++ b/parser/src/parse.rs @@ -0,0 +1,918 @@ +use std::path::PathBuf; + +use regex::Regex; +use sha2::{Digest, Sha256}; + +use crate::{error::Exit, syntax_checker::check_markdown_syntax}; + +pub fn parse(input: PathBuf, ir_output: PathBuf) -> Result<(), Exit> { + let result = std::fs::read_to_string(&input)?; + + check_markdown_syntax(&result)?; + + let result = clean_markdown(result)?; + let result = fix_mark_jump(result)?; + let result = replace_marker_name(result)?; + let result = convert_to_step_sentence_structure(result)?; + let result = strip_invalid_jump(result)?; + + std::fs::write(&ir_output, result)?; + Ok(()) +} + +/// Clean Markdown +/// 1. Remove blockquotes +/// 2. Remove empty lines +/// 3. Trim each line +pub fn clean_markdown(i: String) -> Result<String, Exit> { + let lines = i.lines(); + let mut cleaned = Vec::new(); + + for line in lines { + if line.starts_with('>') { + continue; + } + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + cleaned.push(trimmed.to_string()); + } + + Ok(cleaned.join("\n")) +} + +#[cfg(test)] +mod test_clean_markdown { + use super::*; + + #[test] + fn test_clean_markdown_removes_blockquotes() { + let input = "> This is a blockquote\nNormal text\n> Another blockquote".to_string(); + let expected = "Normal text".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_removes_empty_lines() { + let input = "Line 1\n\n\nLine 2\n\n".to_string(); + let expected = "Line 1\nLine 2".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_trims_lines() { + let input = " Line 1 \n\tLine 2\t\n".to_string(); + let expected = "Line 1\nLine 2".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_combined() { + let input = "> Blockquote\n\n Line 1 \n> Another\n\nLine 2\n\n".to_string(); + let expected = "Line 1\nLine 2".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_empty_input() { + let input = "".to_string(); + let expected = "".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_only_blockquotes() { + let input = "> Quote 1\n> Quote 2".to_string(); + let expected = "".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_clean_markdown_only_whitespace() { + let input = " \n\t\n ".to_string(); + let expected = "".to_string(); + let Ok(result) = clean_markdown(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } +} + +/// Fix jump syntax in each line +/// 1. Correct the following syntax +/// ```ignore +/// - It's [Item](#Mark) +/// > corrected to +/// - It's Item [](#Mark) +/// ``` +/// +/// 2. If there are multiple options, take the first one +/// ```ignore +/// - There might be two options: [A](#A) and [B](#B)! +/// > corrected to +/// - There might be two options: A and B! [](#A) +/// ``` +pub fn fix_mark_jump(i: String) -> Result<String, Exit> { + let mut result = String::new(); + + for line in i.lines() { + let (processed_content, first_link_dest) = helper_process_line_content(line); + let processed_line = helper_format_line_with_link(processed_content, first_link_dest); + let final_line = helper_convert_ordered_list_marker(processed_line); + + result.push_str(&final_line); + result.push('\n'); + } + + if result.ends_with('\n') { + result.pop(); + } + + Ok(result) +} + +/// Process line content, extract link text and return the first link target +/// +/// # Examples +/// +/// ``` +/// use markdialog_parser::parse::helper_process_line_content; +/// +/// // Single link +/// let (content, dest) = helper_process_line_content("This is a [Link](#target) Example"); +/// assert_eq!(content, "This is a Link Example"); +/// assert_eq!(dest, Some("target".to_string())); +/// +/// // Extract the first link +/// let (content, dest) = helper_process_line_content("First [link1](#target1) and second [link2](#target2)"); +/// assert_eq!(content, "First link1 and second link2"); +/// assert_eq!(dest, Some("target1".to_string())); +/// +/// // No link +/// let (content, dest) = helper_process_line_content("Text without link"); +/// assert_eq!(content, "Text without link"); +/// assert_eq!(dest, None); +/// +/// // Invalid link +/// let (content, dest) = helper_process_line_content("Invalid [link format"); +/// assert_eq!(content, "Invalid ["); +/// assert_eq!(dest, None); +/// +/// // Empty +/// let (content, dest) = helper_process_line_content(""); +/// assert_eq!(content, ""); +/// assert_eq!(dest, None); +/// +/// // Link target contains spaces and extra # symbols +/// let (content, dest) = helper_process_line_content("Link[text](# target#)"); +/// assert_eq!(content, "Linktext"); +/// assert_eq!(dest, Some("target".to_string())); +/// ``` +pub fn helper_process_line_content(line: &str) -> (String, Option<String>) { + // Check if line is an image line (starts with "".chars().peekable(); +/// chars.next(); // Skip '[' +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_some()); +/// let (text, dest, _) = result.unwrap(); +/// assert_eq!(text, "Link"); +/// assert_eq!(dest, "target"); +/// +/// // Link text contains spaces +/// let mut chars = "[Link text](#target)".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_some()); +/// let (text, dest, _) = result.unwrap(); +/// assert_eq!(text, "Link text"); +/// assert_eq!(dest, "target"); +/// +/// // Link target contains spaces and extra # symbols +/// let mut chars = "[text](# target#)".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_some()); +/// let (text, dest, _) = result.unwrap(); +/// assert_eq!(text, "text"); +/// assert_eq!(dest, "target"); +/// +/// // Invalid format: missing ']' +/// let mut chars = "[Link(#target)".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_none()); +/// +/// // Invalid format: missing '(#' +/// let mut chars = "[Link]target)".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_none()); +/// +/// // Invalid format: missing ')' +/// let mut chars = "[Link](#target".chars().peekable(); +/// chars.next(); +/// let result = helper_parse_link(&mut chars); +/// assert!(result.is_some()); +/// let (text, dest, _) = result.unwrap(); +/// assert_eq!(text, "Link"); +/// assert_eq!(dest, "target"); +/// ``` +pub fn helper_parse_link<'a>( + chars: &mut std::iter::Peekable<std::str::Chars<'a>>, +) -> Option<(String, String, std::iter::Peekable<std::str::Chars<'a>>)> { + let mut link_text = String::new(); + + while let Some(&ch) = chars.peek() { + chars.next(); + if ch == ']' { + break; + } + link_text.push(ch); + } + + if chars.next() != Some('(') || chars.next() != Some('#') { + return None; + } + + let mut link_dest = String::new(); + while let Some(ch) = chars.next() { + if ch == ')' { + break; + } + link_dest.push(ch); + } + + let cleaned_dest = link_dest.trim().replace(' ', "").replace('#', ""); + + Some((link_text, cleaned_dest, chars.clone())) +} + +/// If there is a link dest, add a jump marker at the end of the line +/// +/// # Examples +/// +/// ``` +/// use markdialog_parser::parse::helper_format_line_with_link; +/// +/// // With a link dest +/// let content = "Some content".to_string(); +/// let link_dest = Some("target".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Some content [](#target)"); +/// +/// // With empty content and a link dest +/// let content = "".to_string(); +/// let link_dest = Some("target".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "[](#target)"); +/// +/// // With trailing spaces in content +/// let content = "Content with spaces ".to_string(); +/// let link_dest = Some("target".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Content with spaces [](#target)"); +/// +/// // Without a link dest +/// let content = "Some content".to_string(); +/// let link_dest = None; +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Some content"); +/// +/// // With an empty link dest +/// let content = "Some content".to_string(); +/// let link_dest = Some("".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Some content"); +/// +/// // With whitespace-only link dest +/// let content = "Some content".to_string(); +/// let link_dest = Some(" ".to_string()); +/// let result = helper_format_line_with_link(content, link_dest); +/// assert_eq!(result, "Some content"); +/// ``` +pub fn helper_format_line_with_link(content: String, link_dest: Option<String>) -> String { + match link_dest { + Some(dest) if !dest.trim().is_empty() => { + format!("{} [](#{})", content.trim_end(), dest.trim()) + .trim() + .to_string() + } + _ => content, + } +} + +/// Convert ordered list markers to unordered list markers +/// +/// # Examples +/// +/// ``` +/// use markdialog_parser::parse::helper_convert_ordered_list_marker; +/// +/// // Basic conversion +/// let input = "1. First item".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- First item"); +/// +/// // Multi-digit numbers +/// let input = "10. Tenth item".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- Tenth item"); +/// +/// // With leading spaces +/// let input = " 2. Second item".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- Second item"); +/// +/// // Not an ordered list marker (no dot and space) +/// let input = "1.Not a list".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "1.Not a list"); +/// +/// // Not an ordered list marker (different spacing) +/// let input = "1. Extra space".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- Extra space"); +/// +/// // Already unordered list +/// let input = "- Already unordered".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "- Already unordered"); +/// +/// // Regular text +/// let input = "This is not a list".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, "This is not a list"); +/// +/// // Empty string +/// let input = "".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, ""); +/// +/// // Only whitespace +/// let input = " ".to_string(); +/// let result = helper_convert_ordered_list_marker(input); +/// assert_eq!(result, " "); +/// ``` +pub fn helper_convert_ordered_list_marker(line: String) -> String { + let trimmed = line.trim_start(); + + if let Some(_rest) = trimmed.strip_prefix(|c: char| c.is_ascii_digit()) { + let mut chars = trimmed.chars(); + let mut digit_count = 0; + + while let Some(c) = chars.next() { + if c.is_ascii_digit() { + digit_count += 1; + } else { + break; + } + } + + if digit_count > 0 { + let rest_after_digits = &trimmed[digit_count..]; + if let Some(content) = rest_after_digits.strip_prefix(". ") { + return format!("- {}", content); + } + } + } + + line +} + +#[cfg(test)] +mod test_fix_mark_jump { + use super::*; + + #[test] + fn test_fix_mark_jump_single_link() { + let input = "- It's [Item](#Mark)".to_string(); + let expected = "- It's Item [](#Mark)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_multiple_links_takes_first() { + let input = "- There might be two options: [A](#A) and [B](#B)!".to_string(); + let expected = "- There might be two options: A and B! [](#A)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_no_link() { + let input = "- Just a normal line".to_string(); + let expected = "- Just a normal line".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_empty_line() { + let input = "".to_string(); + let expected = "".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_multiple_lines() { + let input = "- First [Item](#First)\n- Second [Item](#Second)".to_string(); + let expected = "- First Item [](#First)\n- Second Item [](#Second)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_link_at_end() { + let input = "- End with [link](#target)".to_string(); + let expected = "- End with link [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_link_at_beginning() { + let input = "- [Start](#target) with link".to_string(); + let expected = "- Start with link [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_link_in_middle() { + let input = "- Text [middle](#target) text".to_string(); + let expected = "- Text middle text [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_ordered_list_conversion() { + let input = "1. [Item](#target)".to_string(); + let expected = "- Item [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_ordered_list_multiple_digits() { + let input = "10. [Tenth](#target) item".to_string(); + let expected = "- Tenth item [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_mixed_ordered_and_unordered() { + let input = "1. [First](#first)\n- [Second](#second)\n2. [Third](#third)".to_string(); + let expected = "- First [](#first)\n- Second [](#second)\n- Third [](#third)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_invalid_link_format() { + let input = "- Invalid [link format".to_string(); + let expected = "- Invalid [".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_link_with_spaces_in_target() { + let input = "- Link [text](# target#)".to_string(); + let expected = "- Link text [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_empty_link_text() { + let input = "- [](#target)".to_string(); + let expected = "- [](#target)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_only_whitespace() { + let input = " ".to_string(); + let expected = " ".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } + + #[test] + fn test_fix_mark_jump_complex_multiple_links() { + let input = "- Choose [A](#A), [B](#B), or [C](#C)!".to_string(); + let expected = "- Choose A, B, or C! [](#A)".to_string(); + let Ok(result) = fix_mark_jump(input) else { + panic!("Parse error!"); + }; + assert_eq!(result, expected); + } +} + +/// Replace marker names: replace heading text and link anchors with corresponding SHA256 +/// +/// Example: +/// ```ignore +/// # Original text +/// # Chapter Title +/// - Jump to [Chapter Title](#Chapter Title) +/// +/// # After processing +/// # a1b2c3d4 +/// - Jump to [](#a1b2c3d4) +/// ``` +pub fn replace_marker_name(i: String) -> Result<String, Exit> { + let mut result = i; + + let heading_re = Regex::new(r"^(#{1,5})\s+(.+)$").unwrap(); + let mut heading_map = std::collections::HashMap::new(); + + for line in result.lines() { + if let Some(caps) = heading_re.captures(line) { + let heading_text = caps[2].trim().to_string(); + let hash = format!("{:x}", Sha256::digest(heading_text.as_bytes())); + let short_hash = &hash[..8]; + heading_map.insert(heading_text, short_hash.to_string()); + } + } + + let mut lines: Vec<String> = Vec::new(); + for line in result.lines() { + if let Some(caps) = heading_re.captures(line) { + let level = &caps[1]; + let heading_text = caps[2].trim(); + + if let Some(hash) = heading_map.get(heading_text) { + lines.push(format!("{} {}", level, hash)); + } else { + lines.push(line.to_string()); + } + } else { + lines.push(line.to_string()); + } + } + result = lines.join("\n"); + + let link_re = Regex::new(r"\[\]\(#([^)]+)\)").unwrap(); + result = link_re + .replace_all(&result, |caps: ®ex::Captures| { + let anchor_name = &caps[1]; + if let Some(hash) = heading_map.get(anchor_name) { + format!("[](#{})", hash) + } else { + let hash = format!("{:x}", Sha256::digest(anchor_name.as_bytes())); + let short_hash = &hash[..8]; + format!("[](#{})", short_hash) + } + }) + .to_string(); + + Ok(result) +} + +/// Split content into Step + Sentence structure +pub fn convert_to_step_sentence_structure(input: String) -> Result<String, Exit> { + let mut result = String::new(); + let mut current_marker = String::new(); + let mut current_step_id = 0; + let mut current_character = String::new(); + let mut has_no_switch_flag = false; + + let mut code_record_mode = false; + let mut option_record_mode = false; + + let mut sentences_buffer = String::new(); + for line in input.split("\n") { + // Record code + if code_record_mode { + // If code block marker is found again, end code recording + if line.starts_with("```") && code_record_mode { + sentences_buffer.push_str("`\n"); + code_record_mode = false; + continue; + } + sentences_buffer.push_str(format!("{}\\n", line).as_str()); + continue; + } + + // Record options + if option_record_mode { + // Still an option, continue appending + if line.starts_with("- ") { + let (sentence, next) = helper_get_jump_from_line(line); + let next = if let Some(next) = next { + format!("->[#{}_0]", next) + } else { + next_flag(current_marker.as_str(), current_step_id) + }; + let option_line = format!( + "{}[{}]{}", + character(¤t_character, has_no_switch_flag), + sentence, + next + ); + sentences_buffer.push_str(option_line.as_str()); + sentences_buffer.push('\n'); + continue; + } else { + // When ending option recording, create and advance one Step + result.push_str(step_line(current_marker.as_str(), current_step_id).as_str()); + result.push('\n'); + result.push_str(sentences_buffer.as_str()); + sentences_buffer.clear(); + current_step_id += 1; + // Clean "Has no switch flag" + has_no_switch_flag = false; + // Close option mode + option_record_mode = false; + // Do not continue here, proceed to process subsequent content + } + } + + // Refresh heading + if helper_is_marker(line) { + current_marker = helper_read_maker(line).to_string(); + current_step_id = 0; + continue; + } + + // Refresh character + if helper_is_character(line) { + let (character, no_switch_flag) = helper_read_character(line); + current_character = character.to_string(); + has_no_switch_flag = no_switch_flag; + continue; + } + + // Image recording + if line.starts_with('!') { + sentences_buffer.push_str(line); + sentences_buffer.push('\n'); + continue; + } + + // Start code recording + if line.starts_with("```") && !code_record_mode { + sentences_buffer.push('`'); + code_record_mode = true; + continue; + } + + // Option recording + if line.starts_with("- ") { + let (sentence, next) = helper_get_jump_from_line(line); + let next = if let Some(next) = next { + format!("->[#{}_0]", next) + } else { + next_flag(current_marker.as_str(), current_step_id) + }; + let option_line = format!( + "{}[{}]{}", + character(¤t_character, has_no_switch_flag), + sentence, + next + ); + sentences_buffer.push_str(option_line.as_str()); + sentences_buffer.push('\n'); + + // Start option recording mode + if !option_record_mode { + option_record_mode = true; + } + continue; + } + + // Normal sentence + let (sentence, next) = helper_get_jump_from_line(line); + let next = if let Some(next) = next { + format!("->[#{}_0]", next) + } else { + next_flag(current_marker.as_str(), current_step_id) + }; + let sentence_line = format!( + "{}[{}]{}", + character(¤t_character, has_no_switch_flag), + sentence, + next + ); + has_no_switch_flag = false; + + // Create and advance one Step + result.push_str(step_line(current_marker.as_str(), current_step_id).as_str()); + result.push('\n'); + result.push_str(sentences_buffer.as_str()); + sentences_buffer.clear(); + result.push_str(sentence_line.as_str()); + result.push('\n'); + current_step_id += 1; + } + + Ok(result) +} + +pub fn character(character: &str, has_no_switch_flag: bool) -> String { + let flag = if has_no_switch_flag { "*" } else { "" }; + format!("[{}{}{}]:", &flag, character, &flag) +} + +pub fn step_name(marker: &str, current_id: i64) -> String { + format!("{}_{}", marker, current_id) +} + +pub fn step_line(marker: &str, current_id: i64) -> String { + format!("@@@@@@@@@@ {}_{}", marker, current_id) +} + +pub fn next_flag(marker: &str, current_id: i64) -> String { + format!("->[#{}_{}]", marker, current_id + 1) +} + +pub fn helper_is_marker(line: &str) -> bool { + line.starts_with("# ") + || line.starts_with("## ") + || line.starts_with("### ") + || line.starts_with("#### ") + || line.starts_with("##### ") +} + +pub fn helper_read_maker(line: &str) -> &str { + let trimmed = line.trim_start(); + if trimmed.starts_with('#') { + if trimmed.starts_with("# ") + || trimmed.starts_with("## ") + || trimmed.starts_with("### ") + || trimmed.starts_with("#### ") + || trimmed.starts_with("##### ") + { + let parts: Vec<&str> = trimmed.splitn(2, ' ').collect(); + if parts.len() == 2 { + return parts[1].trim(); + } + } + } + "" +} + +pub fn helper_is_character(line: &str) -> bool { + line.starts_with("######") +} + +pub fn helper_read_character(line: &str) -> (&str, bool) { + let trimmed = line.trim_start(); + if trimmed.starts_with("######") { + let parts: Vec<&str> = trimmed.splitn(2, ' ').collect(); + if parts.len() == 2 { + let character = parts[1].trim(); + if character.starts_with('*') && character.ends_with('*') { + let trimmed = character.trim_matches('*'); + return (trimmed.trim(), true); + } else { + return (character.trim(), false); + } + } + } + ("", false) +} + +pub fn helper_get_jump_from_line(line: &str) -> (String, Option<String>) { + let pattern = r"\[\]\(#([^)]+)\)$"; + let re = Regex::new(pattern).unwrap(); + + if let Some(caps) = re.captures(line.trim_end()) { + let target = caps.get(1).unwrap().as_str(); + let line_without_jump = line + .trim_end() + .replace(&format!(" [](#{})", target), "") + .to_string(); + return ( + line_without_jump.trim_start_matches("- ").to_string(), + Some(format!("{}", target)), + ); + } + + (line.trim_start_matches("- ").to_string(), None) +} + +/// Strip all jumps that have not appeared +pub fn strip_invalid_jump(input: String) -> Result<String, Exit> { + let lines: Vec<&str> = input.lines().collect(); + let mut valid_ids = std::collections::HashSet::new(); + + for line in &lines { + if line.starts_with("@@@@@@@@@@ ") { + let id = line.trim_start_matches("@@@@@@@@@@ ").trim(); + valid_ids.insert(id.to_string()); + } + } + + let mut result_lines = Vec::new(); + let link_re = Regex::new(r"\[#([^)]+)\]").unwrap(); + + for line in lines { + let processed_line = link_re.replace_all(line, |caps: ®ex::Captures| { + let id = &caps[1]; + if valid_ids.contains(id) { + format!("[#{}]", id) + } else { + "[]".to_string() + } + }); + result_lines.push(processed_line.to_string()); + } + + Ok(result_lines.join("\n")) +} diff --git a/parser/src/syntax_checker.rs b/parser/src/syntax_checker.rs new file mode 100644 index 0000000..c8e654c --- /dev/null +++ b/parser/src/syntax_checker.rs @@ -0,0 +1,221 @@ +use crate::error::Exit; + +pub fn check_markdown_syntax(i: &String) -> Result<(), Exit> { + let mut stack = Vec::new(); + let lines: Vec<&str> = i.lines().collect(); + let mut anchors = Vec::new(); + let mut heading_ids = Vec::new(); + + for (line_num, line) in lines.iter().enumerate() { + let line_num = line_num as i64 + 1; + + // Check for headings to collect anchor IDs + if line.starts_with('#') { + let heading_text = line.trim_start_matches('#').trim(); + let id = heading_text + .to_lowercase() + .chars() + .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_') + .collect::<String>(); + if !id.is_empty() { + heading_ids.push(id); + } + } + + let mut chars = line.chars().enumerate().peekable(); + while let Some((pos, ch)) = chars.next() { + let pos = pos as i64 + 1; + + match ch { + '[' => { + // Check if it's a link or image + let is_image = chars.peek().map(|&(_, c)| c) == Some('!'); + if is_image { + chars.next(); // Skip '!' + } + stack.push(('['.to_string(), line_num, pos, is_image)); + } + ']' => { + if let Some((last, _l, b, is_image)) = stack.pop() { + if last != "[" { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: format!( + "Mismatched bracket: expected '[' but found '{}'", + last + ), + line: line_num, + begin: b, + end: pos, + }); + } + // Check if it's followed by '(' for a link + if chars.peek().map(|&(_, c)| c) == Some('(') { + chars.next(); // Skip '(' + // Look for closing ')' + let mut found = false; + let mut anchor_started = false; + let mut anchor = String::new(); + while let Some((_, c)) = chars.next() { + if c == ')' { + found = true; + break; + } + if c == '#' && !anchor_started { + anchor_started = true; + continue; + } + if anchor_started { + anchor.push(c); + } + } + if !found { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: "Link parentheses not closed".to_string(), + line: line_num, + begin: pos, + end: pos, + }); + } + if !anchor.is_empty() { + // Remove whitespace from anchor + let anchor = anchor.replace(|c: char| c.is_whitespace(), ""); + anchors.push((anchor, line_num, pos)); + } + } else if !is_image { + // It's a reference link, collect the anchor + // Check for anchor like [](#anchor) + if chars.peek().map(|&(_, c)| c) == Some('(') { + chars.next(); // Skip '(' + if chars.peek().map(|&(_, c)| c) == Some('#') { + chars.next(); // Skip '#' + let mut anchor = String::new(); + while let Some(&(_, c)) = chars.peek() { + if c == ')' { + break; + } + anchor.push(c); + chars.next(); + } + if !anchor.is_empty() { + // Remove whitespace from anchor + let anchor = + anchor.replace(|c: char| c.is_whitespace(), ""); + anchors.push((anchor, line_num, pos)); + } + } + } + } + } else { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: "Unmatched ']'".to_string(), + line: line_num, + begin: pos, + end: pos, + }); + } + } + '(' => { + // Check for standalone anchor like (#anchor) + if chars.peek().map(|&(_, c)| c) == Some('#') { + chars.next(); // Skip '#' + let mut anchor = String::new(); + while let Some(&(_, c)) = chars.peek() { + if c == ')' { + break; + } + anchor.push(c); + chars.next(); + } + if !anchor.is_empty() { + // Remove whitespace from anchor + let anchor = anchor.replace(|c: char| c.is_whitespace(), ""); + anchors.push((anchor, line_num, pos)); + } + } else { + stack.push(('('.to_string(), line_num, pos, false)); + } + } + ')' => { + if let Some((last, _l, b, _)) = stack.pop() { + if last != "(" { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: format!( + "Mismatched parenthesis: expected '(' but found '{}'", + last + ), + line: line_num, + begin: b, + end: pos, + }); + } + } else { + return Err(Exit::SyntaxError { + content: line.to_string(), + reason: "Unmatched ')'".to_string(), + line: line_num, + begin: pos, + end: pos, + }); + } + } + '`' => { + // Check for backticks + let mut count = 1; + while chars.peek().map(|&(_, c)| c) == Some('`') { + count += 1; + chars.next(); + } + let marker = "`".repeat(count); + + if let Some((last, _, _, _)) = stack.last() { + if last == &marker { + stack.pop(); + } else { + stack.push((marker.clone(), line_num, pos, false)); + } + } else { + stack.push((marker, line_num, pos, false)); + } + } + _ => {} + } + } + } + + // Check for unclosed brackets/parentheses + if let Some((last, line, begin, _)) = stack.pop() { + return Err(Exit::SyntaxError { + content: lines[(line - 1) as usize].to_string(), + reason: format!("Unclosed '{}'", last), + line, + begin, + end: begin, + }); + } + + // Check if all anchors have corresponding headings + for (anchor, line_num, pos) in anchors { + // Normalize anchor for comparison: convert to lowercase and filter characters + let normalized_anchor = anchor + .to_lowercase() + .chars() + .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_') + .collect::<String>(); + + if !heading_ids.contains(&normalized_anchor) { + return Err(Exit::SyntaxError { + content: lines[(line_num - 1) as usize].to_string(), + reason: format!("Anchor '#{}' has no corresponding heading", anchor), + line: line_num, + begin: pos, + end: pos + anchor.len() as i64, + }); + } + } + + Ok(()) +} diff --git a/player/Cargo.toml b/player/Cargo.toml new file mode 100644 index 0000000..192dcfa --- /dev/null +++ b/player/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "markdialog_player" +workspaces.version = true +edition = "2024" + +[dependencies] diff --git a/player/src/lib.rs b/player/src/lib.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/player/src/lib.rs @@ -0,0 +1 @@ + diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..728e349 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,14 @@ +// markdialog::player +// pub mod player { +// pub use markdialog_player::*; +// } + +// markdialog::parser +pub mod parser { + pub use markdialog_parser::*; +} + +// markdialog::res +pub mod res { + pub use built_res::*; +} diff --git a/usage.txt b/usage.txt new file mode 100644 index 0000000..94d68b2 --- /dev/null +++ b/usage.txt @@ -0,0 +1,6 @@ +mdialogp -i <FILE> -o <FILE> + -i, --input <FILE> Input file (Markdown / MarkDialog source) + -o, --output <FILE> Output file (Generated IR / result) + + -h, --help Show help and exit + -v, --version Show version and exit diff --git a/version.txt b/version.txt new file mode 100644 index 0000000..6e8bf73 --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +0.1.0 |
