aboutsummaryrefslogtreecommitdiff
path: root/mingling_pathf/src/module_pathf.rs
blob: d06be9b9931440ee215c7fa978629c889d6ada7e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use syn::{Item, UseTree};

use crate::error::MinglingPathfinderError;

/// Represents a mapping from a module path to a source file.
///
/// Each instance represents a source file and its corresponding
/// effective module path (e.g., `crate::foo::bar`).
#[derive(Debug, Clone)]
pub struct MappingItem {

    /// The path of the source file (relative to the crate root, with `./` prefix).
    file_path: PathBuf,

    /// The effective module path corresponding to this file (e.g., `"crate::foo::bar"`).
    module_path: String,
}

impl MappingItem {
    /// Returns the path of the source file (relative to the crate root, with `./` prefix).
    pub fn file_path(&self) -> &Path {
        &self.file_path
    }

    /// Returns the effective module path corresponding to this file (e.g., `"crate::foo::bar"`).
    pub fn module_path(&self) -> &str {
        &self.module_path
    }
}

/// A mapping from module paths to source files.
///
/// Built by the [`analyze`] function. Records the effective module path
/// for each source file in a crate, taking into account `pub use` re-exports.
///
/// Can be iterated over via [`IntoIterator`] to get each [`MappingItem`].
#[derive(Debug)]
pub struct ModulePathMapping {
    /// A list of mappings from source files to their effective module paths.
    items: Vec<MappingItem>,
}

/// Analyzes the module structure of a crate and returns the effective module path for each source file.
///
/// `crate_dir` is the crate root directory (i.e., the directory containing `Cargo.toml`).
pub fn analyze(crate_dir: &Path) -> Result<ModulePathMapping, MinglingPathfinderError> {
    let src_dir = crate_dir.join("src");
    if !src_dir.is_dir() {
        return Err(std::io::Error::new(
            std::io::ErrorKind::NotFound,
            format!("src/ directory not found at {}", src_dir.display()),
        )
        .into());
    }

    let entry = find_entry_point(&src_dir)?;
    let mut ctx = Context::new(crate_dir);

    // Phase 1: Traverse the module tree, recording direct paths and re-export relationships
    let root_path = "crate".to_string();
    build_direct_paths(&entry, &root_path, &mut ctx)?;

    // Phase 2: Propagate effective paths top-down
    // The effective path of the root file is "crate"
    ctx.effective_paths
        .insert(entry.clone(), "crate".to_string());
    propagate_children(&entry, &mut ctx);

    // Assemble the results
    let items = ctx
        .effective_paths
        .iter()
        .map(|(file, module_path)| MappingItem {
            file_path: ctx.relative_path(file),
            module_path: module_path.clone(),
        })
        .collect();

    Ok(ModulePathMapping { items })
}

/// Internal context used during analysis to maintain module paths, child module relationships, and re-export information.
struct Context {
    /// The crate root directory (i.e., the directory containing Cargo.toml)
    crate_dir: PathBuf,
    /// Mapping from source files to their direct module paths (e.g., `"crate::foo::bar"`)
    direct_paths: HashMap<PathBuf, String>,
    /// Mapping from source files to their effective module paths (after considering `pub use` re-exports)
    effective_paths: HashMap<PathBuf, String>,
    /// Mapping from source files to their child module lists
    children: HashMap<PathBuf, Vec<ChildModule>>,
    /// Mapping from source files to the list of module names re-exported via `pub use`
    reexports: HashMap<PathBuf, Vec<String>>,
    /// Set of source files already visited, used to prevent cycles
    visited: std::collections::HashSet<PathBuf>,
}

#[derive(Clone)]
struct ChildModule {
    name: String,
    file: PathBuf,
}

impl Context {
    fn new(crate_dir: &Path) -> Self {
        Self {
            crate_dir: crate_dir.to_path_buf(),
            direct_paths: HashMap::new(),
            effective_paths: HashMap::new(),
            children: HashMap::new(),
            reexports: HashMap::new(),
            visited: std::collections::HashSet::new(),
        }
    }

    fn relative_path(&self, abs: &Path) -> PathBuf {
        if let Ok(rel) = abs.strip_prefix(&self.crate_dir) {
            PathBuf::from("./").join(rel)
        } else {
            abs.to_path_buf()
        }
    }
}

/// Finds the entry point file of a crate.
///
/// The resolution order is:
/// 1. First, look for `src/main.rs` or `src/lib.rs`.
/// 2. If neither exists, look for any `.rs` file in the `src/bin/` directory (binary entry points).
///
/// # Arguments
/// - `src_dir`: The path to the crate's `src/` directory.
///
/// # Returns
/// Returns the absolute path to the first entry point file found.
///
/// # Errors
/// Returns [`MinglingPathfinderError::NoEntryPointFound`] if no entry point file is found.
fn find_entry_point(src_dir: &Path) -> Result<PathBuf, MinglingPathfinderError> {
    // First, look for src/main.rs or src/lib.rs
    for name in &["main.rs", "lib.rs"] {
        let path = src_dir.join(name);
        if path.is_file() {
            return Ok(path);
        }
    }

    // Next, look for .rs files in src/bin/
    let bin_dir = src_dir.join("bin");
    if bin_dir.is_dir() {
        for entry in std::fs::read_dir(&bin_dir)? {
            let entry = entry?;
            let path = entry.path();
            if path.extension().is_some_and(|e| e == "rs") {
                return Ok(path);
            }
        }
    }

    // No entry point found, return an error
    Err(MinglingPathfinderError::NoEntryPointFound)
}

/// Recursively builds direct module paths for source files, recording child modules and `pub use` re-exports.
///
/// This is the first phase of the analysis, responsible for:
/// - Marking files as visited (to prevent circular dependencies)
/// - Recording the direct module path of each file (e.g., `crate::foo::bar`)
/// - Parsing file contents to extract:
///   - Child modules declared via `mod xxx;` (including their visibility)
///   - Module names re-exported via `pub use`
/// - Recursively processing each child module file to build the complete module tree path
///
/// # Arguments
/// - `file`: The absolute path of the source file currently being processed
/// - `module_path`: The direct module path corresponding to the current file (e.g., `"crate::foo"`)
/// - `ctx`: The context used to store paths, child modules, and re-export information
fn build_direct_paths(
    file: &Path,
    module_path: &str,
    ctx: &mut Context,
) -> Result<(), MinglingPathfinderError> {
    // If the current file has already been visited, skip it (prevent infinite recursion due to circular references)
    if !ctx.visited.insert(file.to_path_buf()) {
        return Ok(());
    }

    // Record the direct module path for the current file
    ctx.direct_paths
        .insert(file.to_path_buf(), module_path.to_string());

    // Read and parse the source file into a syntax tree
    let content = std::fs::read_to_string(file)?;
    let syntax = syn::parse_file(&content).map_err(|e| MinglingPathfinderError::SynError {
        path: file.to_path_buf(),
        message: e.to_string(),
    })?;

    // Store child module information: `(module_name, is_public)`
    let mut sub_modules: Vec<(String, bool)> = Vec::new();
    // Store module names re-exported via `pub use`
    let mut reexports: Vec<String> = Vec::new();

    // Iterate over top-level items in the syntax tree
    for item in &syntax.items {
        match item {
            // Handle `mod xxx;` (non-inline modules, i.e., those corresponding to external files)
            Item::Mod(item_mod) if item_mod.semi.is_some() => {
                let is_pub = matches!(item_mod.vis, syn::Visibility::Public(_));
                sub_modules.push((item_mod.ident.to_string(), is_pub));
            }
            // Handle `pub use xxx;` re-exports
            Item::Use(item_use) => {
                if matches!(item_use.vis, syn::Visibility::Public(_)) {
                    collect_reexports(&item_use.tree, &mut reexports);
                }
            }
            _ => {}
        }
    }

    // If there are re-exports, record them in the context
    if !reexports.is_empty() {
        ctx.reexports.insert(file.to_path_buf(), reexports);
    }

    let mut children = Vec::new();

    // Recursively process each child module
    for (name, _is_pub) in &sub_modules {
        // Resolve the file path corresponding to the child module
        let child_path = resolve_module_file(file, name)?;
        // Construct the child module's direct path: `parent_path::child_module_name`
        let child_direct = format!("{module_path}::{name}");
        // Recursively build paths for the child module
        build_direct_paths(&child_path, &child_direct, ctx)?;
        children.push(ChildModule {
            name: name.clone(),
            file: child_path,
        });
    }

    // If there are children, record them in the context
    if !children.is_empty() {
        ctx.children.insert(file.to_path_buf(), children);
    }

    Ok(())
}

/// Collects re-exported module names from a `use` syntax tree (i.e., `pub use`).
///
/// Only recognizes `pub use X::*;` and `pub use X;` forms (which hoist the entire module).
fn collect_reexports(tree: &UseTree, results: &mut Vec<String>) {
    match tree {
        UseTree::Path(use_path) => {
            if matches!(use_path.tree.as_ref(), UseTree::Glob(_)) {
                results.push(use_path.ident.to_string());
            }
        }
        UseTree::Name(use_name) => {
            results.push(use_name.ident.to_string());
        }
        UseTree::Group(group) => {
            for item in &group.items {
                collect_reexports(item, results);
            }
        }
        UseTree::Rename(rename) => {
            results.push(rename.ident.to_string());
        }
        _ => {}
    }
}

/// Resolves the source file path for a child module based on the parent file's path and the child module's name.
///
/// # Resolution Rules
///
/// The Rust module resolution rules are as follows:
/// - If the parent file is `mod.rs`, the module base directory is the parent file's directory.
/// - If the parent file is `main.rs` or `lib.rs` (i.e., the crate root entry point), the module base directory is also the parent file's directory,
///   because the crate root directly corresponds to the `src/` directory.
/// - For other files (e.g., `aaa.rs`), the module base directory is the directory with the same name as the file (without extension)
///   located under the parent file's directory (e.g., `aaa/`).
///
/// After determining the module base directory based on the rules above, the following candidate paths are tried in order:
/// 1. `{module_base}/{module_name}.rs` (single-file module)
/// 2. `{module_base}/{module_name}/mod.rs` (directory module)
///
/// # Arguments
/// - `parent_file`: The absolute path of the parent module source file.
/// - `module_name`: The name of the child module.
///
/// # Returns
/// Returns the absolute path to the child module's source file.
///
/// # Errors
/// Returns [`MinglingPathfinderError::ModuleNotFound`] if none of the candidate paths exist.
fn resolve_module_file(
    parent_file: &Path,
    module_name: &str,
) -> Result<PathBuf, MinglingPathfinderError> {
    let parent_dir = parent_file.parent().unwrap();
    let file_stem = parent_file
        .file_stem()
        .and_then(std::ffi::OsStr::to_str)
        .unwrap_or("");

    // Rust module resolution rules:
    // - `mod.rs` → module base is the parent directory
    // - `main.rs` / `lib.rs` → crate root, module base is directly src/
    // - `aaa.rs` → module base is `src/aaa/`
    let module_base = if file_stem == "mod" || file_stem == "main" || file_stem == "lib" {
        parent_dir.to_path_buf()
    } else {
        parent_dir.join(file_stem)
    };

    let candidates = [
        module_base.join(format!("{module_name}.rs")),
        module_base.join(module_name).join("mod.rs"),
    ];

    for path in &candidates {
        if path.is_file() {
            return Ok(path.clone());
        }
    }

    Err(MinglingPathfinderError::ModuleNotFound {
        parent: parent_file.to_path_buf(),
        module_name: module_name.to_string(),
    })
}

/// Starting from the parent file, recursively compute the effective paths of all child files.
///
/// Core rules:
/// - The effective path of a child file = parent effective path + "::" + child module name
/// - However, if the parent file re-exports the child module via `pub use`,
///   then the effective path of the child file = parent effective path (the module name is not appended, i.e., it is hoisted to the parent level)
fn propagate_children(parent_file: &Path, ctx: &mut Context) {
    let parent_effective = ctx
        .effective_paths
        .get(parent_file)
        .cloned()
        .unwrap_or_else(|| "crate".to_string());

    let reexported = ctx
        .reexports
        .get(parent_file)
        .cloned()
        .unwrap_or_default();

    let Some(children) = ctx.children.get(parent_file).cloned() else {
        return;
    };

    for child in &children {
        let effective = if reexported.contains(&child.name) {
            // Re-exported: hoist to parent level, do not append module name
            parent_effective.clone()
        } else {
            format!("{}::{}", parent_effective, child.name)
        };

        ctx.effective_paths
            .insert(child.file.clone(), effective);
        propagate_children(&child.file, ctx);
    }
}

impl IntoIterator for ModulePathMapping {
    type Item = MappingItem;
    type IntoIter = std::vec::IntoIter<MappingItem>;

    fn into_iter(self) -> Self::IntoIter {
        self.items.into_iter()
    }
}