#!/usr/bin/env python3
import os
import re


def is_excluded(name, is_dir):
    """判断是否应该被排除
    排除规则：
    - 点开头的隐藏文件/目录
    - 以下划线开头的文件/目录
    """
    if name.startswith("."):
        return True
    if name.startswith("_"):
        return True
    return False


def count_net_text(file_path):
    """
    统计文件的净文本字符数。
    规则：
    1. 去除行首的 Markdown 标记（##、-、*、数字序号、>）
    2. 去除所有 ASCII 标点符号、中文标点、空白字符
    3. 只保留中文字符、英文字母、数字
    """
    if not os.path.isfile(file_path):
        return 0

    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
    except Exception:
        return 0

    # 去除行首 Markdown 标记
    lines = content.split("\n")
    cleaned_lines = []
    for line in lines:
        line = re.sub(r"^#+\s*", "", line)
        line = re.sub(r"^-\s*", "", line)
        line = re.sub(r"^\*\s*", "", line)
        line = re.sub(r"^\d+\.\s*", "", line)
        line = re.sub(r"^>\s*", "", line)
        cleaned_lines.append(line)

    text = "".join(cleaned_lines)

    # 只保留：中文字符（含扩展区）、英文字母、数字
    net_chars = re.findall(r"[\u4e00-\u9fff\uf900-\ufaffA-Za-z0-9]", text)

    return len(net_chars)


def calc_dir_total(dir_path):
    """递归计算目录及其子目录的总净文本数"""
    total = 0
    try:
        entries = sorted(os.listdir(dir_path))
    except PermissionError:
        return 0

    for entry in entries:
        entry_path = os.path.join(dir_path, entry)
        is_dir = os.path.isdir(entry_path)
        if is_excluded(entry, is_dir):
            continue
        if os.path.isfile(entry_path):
            total += count_net_text(entry_path)
        elif is_dir:
            total += calc_dir_total(entry_path)

    return total


def print_tree(dir_path, prefix=""):
    """递归打印树状图"""
    try:
        entries = sorted(os.listdir(dir_path))
    except PermissionError:
        return

    filtered = []
    for entry in entries:
        entry_path = os.path.join(dir_path, entry)
        is_dir = os.path.isdir(entry_path)
        if is_excluded(entry, is_dir):
            continue
        filtered.append(entry)

    total_items = len(filtered)

    for idx, entry in enumerate(filtered):
        is_last = idx == total_items - 1
        entry_path = os.path.join(dir_path, entry)

        connector = "└── " if is_last else "├── "
        child_prefix = prefix + ("    " if is_last else "│   ")

        if os.path.isfile(entry_path):
            cnt = count_net_text(entry_path)
            print(f"{prefix}{connector}{entry}（净文本：{cnt}字）")
        elif os.path.isdir(entry_path):
            sub_total = calc_dir_total(entry_path)
            print(f"{prefix}{connector}{entry} (净文本：{sub_total}字)")
            print_tree(entry_path, child_prefix)


def main():
    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

    # 收集根目录项
    root_entries = sorted(os.listdir(base_dir))
    root_items = []
    for entry in root_entries:
        entry_path = os.path.join(base_dir, entry)
        is_dir = os.path.isdir(entry_path)
        if is_excluded(entry, is_dir):
            continue
        root_items.append(entry)

    # 计算根目录总计
    grand_total = 0
    for entry in root_items:
        entry_path = os.path.join(base_dir, entry)
        if os.path.isfile(entry_path):
            grand_total += count_net_text(entry_path)
        elif os.path.isdir(entry_path):
            grand_total += calc_dir_total(entry_path)

    # 统计目录和文件数（递归统计全部）
    total_dirs = 0
    total_files = 0
    for entry in root_items:
        entry_path = os.path.join(base_dir, entry)
        if os.path.isdir(entry_path):
            total_dirs += 1
            for current_root, dirs, files in os.walk(entry_path):
                dirs[:] = [d for d in dirs if not is_excluded(d, True)]
                files = [f for f in files if not is_excluded(f, False)]
                total_dirs += len(dirs)
                total_files += len(files)
        elif os.path.isfile(entry_path):
            total_files += 1

    # 输出
    print(".")
    print(f"（净文本总计：{grand_total}字）")

    total_root = len(root_items)
    for idx, entry in enumerate(root_items):
        is_last = idx == total_root - 1
        entry_path = os.path.join(base_dir, entry)

        connector = "└── " if is_last else "├── "
        child_prefix = "    " if is_last else "│   "

        if os.path.isfile(entry_path):
            cnt = count_net_text(entry_path)
            print(f"{connector}{entry}（净文本：{cnt}字）")
        elif os.path.isdir(entry_path):
            sub_total = calc_dir_total(entry_path)
            print(f"{connector}{entry} (净文本：{sub_total}字)")
            print_tree(entry_path, child_prefix)

    print(f"\n{total_dirs} directories, {total_files} files")


if __name__ == "__main__":
    main()