summaryrefslogtreecommitdiff
path: root/parser/src/syntax_checker.rs
diff options
context:
space:
mode:
Diffstat (limited to 'parser/src/syntax_checker.rs')
-rw-r--r--parser/src/syntax_checker.rs221
1 files changed, 221 insertions, 0 deletions
diff --git a/parser/src/syntax_checker.rs b/parser/src/syntax_checker.rs
new file mode 100644
index 0000000..c8e654c
--- /dev/null
+++ b/parser/src/syntax_checker.rs
@@ -0,0 +1,221 @@
+use crate::error::Exit;
+
+pub fn check_markdown_syntax(i: &String) -> Result<(), Exit> {
+ let mut stack = Vec::new();
+ let lines: Vec<&str> = i.lines().collect();
+ let mut anchors = Vec::new();
+ let mut heading_ids = Vec::new();
+
+ for (line_num, line) in lines.iter().enumerate() {
+ let line_num = line_num as i64 + 1;
+
+ // Check for headings to collect anchor IDs
+ if line.starts_with('#') {
+ let heading_text = line.trim_start_matches('#').trim();
+ let id = heading_text
+ .to_lowercase()
+ .chars()
+ .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_')
+ .collect::<String>();
+ if !id.is_empty() {
+ heading_ids.push(id);
+ }
+ }
+
+ let mut chars = line.chars().enumerate().peekable();
+ while let Some((pos, ch)) = chars.next() {
+ let pos = pos as i64 + 1;
+
+ match ch {
+ '[' => {
+ // Check if it's a link or image
+ let is_image = chars.peek().map(|&(_, c)| c) == Some('!');
+ if is_image {
+ chars.next(); // Skip '!'
+ }
+ stack.push(('['.to_string(), line_num, pos, is_image));
+ }
+ ']' => {
+ if let Some((last, _l, b, is_image)) = stack.pop() {
+ if last != "[" {
+ return Err(Exit::SyntaxError {
+ content: line.to_string(),
+ reason: format!(
+ "Mismatched bracket: expected '[' but found '{}'",
+ last
+ ),
+ line: line_num,
+ begin: b,
+ end: pos,
+ });
+ }
+ // Check if it's followed by '(' for a link
+ if chars.peek().map(|&(_, c)| c) == Some('(') {
+ chars.next(); // Skip '('
+ // Look for closing ')'
+ let mut found = false;
+ let mut anchor_started = false;
+ let mut anchor = String::new();
+ while let Some((_, c)) = chars.next() {
+ if c == ')' {
+ found = true;
+ break;
+ }
+ if c == '#' && !anchor_started {
+ anchor_started = true;
+ continue;
+ }
+ if anchor_started {
+ anchor.push(c);
+ }
+ }
+ if !found {
+ return Err(Exit::SyntaxError {
+ content: line.to_string(),
+ reason: "Link parentheses not closed".to_string(),
+ line: line_num,
+ begin: pos,
+ end: pos,
+ });
+ }
+ if !anchor.is_empty() {
+ // Remove whitespace from anchor
+ let anchor = anchor.replace(|c: char| c.is_whitespace(), "");
+ anchors.push((anchor, line_num, pos));
+ }
+ } else if !is_image {
+ // It's a reference link, collect the anchor
+ // Check for anchor like [](#anchor)
+ if chars.peek().map(|&(_, c)| c) == Some('(') {
+ chars.next(); // Skip '('
+ if chars.peek().map(|&(_, c)| c) == Some('#') {
+ chars.next(); // Skip '#'
+ let mut anchor = String::new();
+ while let Some(&(_, c)) = chars.peek() {
+ if c == ')' {
+ break;
+ }
+ anchor.push(c);
+ chars.next();
+ }
+ if !anchor.is_empty() {
+ // Remove whitespace from anchor
+ let anchor =
+ anchor.replace(|c: char| c.is_whitespace(), "");
+ anchors.push((anchor, line_num, pos));
+ }
+ }
+ }
+ }
+ } else {
+ return Err(Exit::SyntaxError {
+ content: line.to_string(),
+ reason: "Unmatched ']'".to_string(),
+ line: line_num,
+ begin: pos,
+ end: pos,
+ });
+ }
+ }
+ '(' => {
+ // Check for standalone anchor like (#anchor)
+ if chars.peek().map(|&(_, c)| c) == Some('#') {
+ chars.next(); // Skip '#'
+ let mut anchor = String::new();
+ while let Some(&(_, c)) = chars.peek() {
+ if c == ')' {
+ break;
+ }
+ anchor.push(c);
+ chars.next();
+ }
+ if !anchor.is_empty() {
+ // Remove whitespace from anchor
+ let anchor = anchor.replace(|c: char| c.is_whitespace(), "");
+ anchors.push((anchor, line_num, pos));
+ }
+ } else {
+ stack.push(('('.to_string(), line_num, pos, false));
+ }
+ }
+ ')' => {
+ if let Some((last, _l, b, _)) = stack.pop() {
+ if last != "(" {
+ return Err(Exit::SyntaxError {
+ content: line.to_string(),
+ reason: format!(
+ "Mismatched parenthesis: expected '(' but found '{}'",
+ last
+ ),
+ line: line_num,
+ begin: b,
+ end: pos,
+ });
+ }
+ } else {
+ return Err(Exit::SyntaxError {
+ content: line.to_string(),
+ reason: "Unmatched ')'".to_string(),
+ line: line_num,
+ begin: pos,
+ end: pos,
+ });
+ }
+ }
+ '`' => {
+ // Check for backticks
+ let mut count = 1;
+ while chars.peek().map(|&(_, c)| c) == Some('`') {
+ count += 1;
+ chars.next();
+ }
+ let marker = "`".repeat(count);
+
+ if let Some((last, _, _, _)) = stack.last() {
+ if last == &marker {
+ stack.pop();
+ } else {
+ stack.push((marker.clone(), line_num, pos, false));
+ }
+ } else {
+ stack.push((marker, line_num, pos, false));
+ }
+ }
+ _ => {}
+ }
+ }
+ }
+
+ // Check for unclosed brackets/parentheses
+ if let Some((last, line, begin, _)) = stack.pop() {
+ return Err(Exit::SyntaxError {
+ content: lines[(line - 1) as usize].to_string(),
+ reason: format!("Unclosed '{}'", last),
+ line,
+ begin,
+ end: begin,
+ });
+ }
+
+ // Check if all anchors have corresponding headings
+ for (anchor, line_num, pos) in anchors {
+ // Normalize anchor for comparison: convert to lowercase and filter characters
+ let normalized_anchor = anchor
+ .to_lowercase()
+ .chars()
+ .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_')
+ .collect::<String>();
+
+ if !heading_ids.contains(&normalized_anchor) {
+ return Err(Exit::SyntaxError {
+ content: lines[(line_num - 1) as usize].to_string(),
+ reason: format!("Anchor '#{}' has no corresponding heading", anchor),
+ line: line_num,
+ begin: pos,
+ end: pos + anchor.len() as i64,
+ });
+ }
+ }
+
+ Ok(())
+}