gwhsss 2 months ago
parent
commit
d6d679380e

+ 0 - 0
doc/doc.txt


+ 3 - 0
src/LinearAlgebra/.idea/.gitignore

@@ -0,0 +1,3 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml

+ 11 - 0
src/LinearAlgebra/.idea/LinearAlgebra.iml

@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+      <excludeFolder url="file://$MODULE_DIR$/my_gptmodel" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.12 (LinearAlgebra)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

+ 6 - 0
src/LinearAlgebra/.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 6 - 0
src/LinearAlgebra/.idea/misc.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12 (LinearAlgebra)" />
+  </component>
+</project>

+ 8 - 0
src/LinearAlgebra/.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/LinearAlgebra.iml" filepath="$PROJECT_DIR$/.idea/LinearAlgebra.iml" />
+    </modules>
+  </component>
+</project>

+ 6 - 0
src/LinearAlgebra/.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
+  </component>
+</project>

+ 1283 - 0
src/LinearAlgebra/deepleaning_demo.py

@@ -0,0 +1,1283 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+import json
+import os
+import time
+from typing import List, Optional, Tuple, Dict
+from datetime import datetime
+import glob
+
+# ==================== 全局配置参数 ====================
+
+# 模型配置 - 增大模型提高质量
+MODEL_CONFIG = {
+    'n_layer': 8,  # 增加层数
+    'n_head': 8,  # 增加注意力头
+    'n_embd': 256,  # 增加嵌入维度
+    'max_seq_len': 512,
+    'dropout': 0.1,
+    'bias': True,
+}
+
+# 训练配置 - 优化训练参数
+TRAINING_CONFIG = {
+    'epochs': 2000,
+    'batch_size': 16,
+    'learning_rate': 6e-4,
+    'block_size': 256,
+    'weight_decay': 0.01,
+    'grad_clip': 1.0,
+    'warmup_epochs': 50,
+    'min_loss': 0.05,  # 目标最小损失
+}
+
+# 生成配置 - 设为全局,无需输入
+GENERATION_CONFIG = {
+    'max_tokens': 900,
+    'temperature': 0.7,
+    'top_k': 40,
+    'top_p': 0.85,
+    'repetition_penalty': 1.1,
+}
+
+# 文件配置
+FILE_CONFIG = {
+    'save_dir': "my_gptmodel",
+    'training_data_file': "training_data.txt",
+    'programming_data_file': "programming_data.txt",
+    'model_prefix': "gpt_model",
+    'tokenizer_prefix': "tokenizer",
+}
+
+# 训练数据配置
+TRAINING_DATA_CONFIG = {
+    'data_repetition': 5,  # 增加数据重复
+    'min_text_length': 500,
+}
+
+
+# ==================== 模型类定义 ====================
+
+class GPTConfig:
+    """GPT模型配置类"""
+
+    def __init__(
+            self,
+            vocab_size: int = 50257,
+            n_layer: int = MODEL_CONFIG['n_layer'],
+            n_head: int = MODEL_CONFIG['n_head'],
+            n_embd: int = MODEL_CONFIG['n_embd'],
+            max_seq_len: int = MODEL_CONFIG['max_seq_len'],
+            dropout: float = MODEL_CONFIG['dropout'],
+            bias: bool = MODEL_CONFIG['bias'],
+    ):
+        self.vocab_size = vocab_size
+        self.n_layer = n_layer
+        self.n_head = n_head
+        self.n_embd = n_embd
+        self.max_seq_len = max_seq_len
+        self.dropout = dropout
+        self.bias = bias
+
+    def __str__(self):
+        return f"GPTConfig(vocab_size={self.vocab_size}, n_layer={self.n_layer}, n_head={self.n_head}, n_embd={self.n_embd})"
+
+
+class OptimizedCausalSelfAttention(nn.Module):
+    """优化的因果自注意力机制"""
+
+    def __init__(self, config: GPTConfig):
+        super().__init__()
+        assert config.n_embd % config.n_head == 0
+
+        self.n_head = config.n_head
+        self.n_embd = config.n_embd
+        self.head_size = config.n_embd // config.n_head
+
+        self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
+        self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
+        self.attn_dropout = nn.Dropout(config.dropout)
+        self.resid_dropout = nn.Dropout(config.dropout)
+
+        # 预计算因果掩码
+        self.register_buffer("bias", torch.tril(torch.ones(config.max_seq_len, config.max_seq_len))
+                             .view(1, 1, config.max_seq_len, config.max_seq_len))
+
+        self.scale = 1.0 / math.sqrt(self.head_size)
+
+    def forward(self, x):
+        B, T, C = x.size()
+
+        qkv = self.c_attn(x)
+        q, k, v = qkv.split(self.n_embd, dim=2)
+
+        q = q.view(B, T, self.n_head, self.head_size).transpose(1, 2)
+        k = k.view(B, T, self.n_head, self.head_size).transpose(1, 2)
+        v = v.view(B, T, self.n_head, self.head_size).transpose(1, 2)
+
+        att = (q @ k.transpose(-2, -1)) * self.scale
+        att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.attn_dropout(att)
+
+        y = att @ v
+        y = y.transpose(1, 2).contiguous().view(B, T, C)
+        y = self.resid_dropout(self.c_proj(y))
+        return y
+
+
+class OptimizedMLP(nn.Module):
+    """优化的多层感知机"""
+
+    def __init__(self, config: GPTConfig):
+        super().__init__()
+        intermediate_size = 4 * config.n_embd  # 恢复4倍维度
+        self.c_fc = nn.Linear(config.n_embd, intermediate_size, bias=config.bias)
+        self.gelu = nn.GELU()
+        self.c_proj = nn.Linear(intermediate_size, config.n_embd, bias=config.bias)
+        self.dropout = nn.Dropout(config.dropout)
+
+    def forward(self, x):
+        x = self.c_fc(x)
+        x = self.gelu(x)
+        x = self.c_proj(x)
+        x = self.dropout(x)
+        return x
+
+
+class OptimizedBlock(nn.Module):
+    """优化的Transformer块"""
+
+    def __init__(self, config: GPTConfig):
+        super().__init__()
+        self.ln_1 = nn.LayerNorm(config.n_embd, eps=1e-5)
+        self.attn = OptimizedCausalSelfAttention(config)
+        self.ln_2 = nn.LayerNorm(config.n_embd, eps=1e-5)
+        self.mlp = OptimizedMLP(config)
+
+    def forward(self, x):
+        x = x + self.attn(self.ln_1(x))
+        x = x + self.mlp(self.ln_2(x))
+        return x
+
+
+class OptimizedGPT(nn.Module):
+    """优化的GPT模型"""
+
+    def __init__(self, config: GPTConfig):
+        super().__init__()
+        self.config = config
+
+        self.wte = nn.Embedding(config.vocab_size, config.n_embd)
+        self.wpe = nn.Embedding(config.max_seq_len, config.n_embd)
+        self.drop = nn.Dropout(config.dropout)
+
+        self.blocks = nn.ModuleList([OptimizedBlock(config) for _ in range(config.n_layer)])
+        self.ln_f = nn.LayerNorm(config.n_embd, eps=1e-5)
+        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+
+        # 权重绑定
+        self.wte.weight = self.lm_head.weight
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, module):
+        """权重初始化"""
+        if isinstance(module, nn.Linear):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+
+    def forward(self, idx, targets=None):
+        device = idx.device
+        b, t = idx.size()
+
+        assert t <= self.config.max_seq_len, f"序列长度{t}超过最大长度{self.config.max_seq_len}"
+
+        pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
+
+        tok_emb = self.wte(idx)
+        pos_emb = self.wpe(pos)
+        x = self.drop(tok_emb + pos_emb)
+
+        for block in self.blocks:
+            x = block(x)
+
+        x = self.ln_f(x)
+
+        if targets is not None:
+            logits = self.lm_head(x)
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
+        else:
+            logits = self.lm_head(x)
+            loss = None
+
+        return logits, loss
+
+    def generate(self, idx, max_new_tokens=None, temperature=None, top_k=None, top_p=None, repetition_penalty=None):
+        """生成文本 - 增强版"""
+        max_new_tokens = max_new_tokens or GENERATION_CONFIG['max_tokens']
+        temperature = temperature or GENERATION_CONFIG['temperature']
+        top_k = top_k if top_k is not None else GENERATION_CONFIG['top_k']
+        top_p = top_p if top_p is not None else GENERATION_CONFIG['top_p']
+        repetition_penalty = repetition_penalty or GENERATION_CONFIG['repetition_penalty']
+
+        generated_sequence = []
+
+        for _ in range(max_new_tokens):
+            idx_cond = idx if idx.size(1) <= self.config.max_seq_len else idx[:, -self.config.max_seq_len:]
+
+            logits, _ = self(idx_cond)
+            logits = logits[:, -1, :]
+
+            # 重复惩罚
+            if repetition_penalty != 1.0:
+                for token in set(generated_sequence):
+                    logits[0, token] /= repetition_penalty
+
+            # 温度调节
+            if temperature != 1.0:
+                logits = logits / temperature
+
+            # Top-K 过滤
+            if top_k is not None and top_k > 0:
+                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                logits[logits < v[:, -1].unsqueeze(-1)] = -float('Inf')
+
+            # Top-P (核采样) 过滤
+            if top_p is not None and top_p < 1.0:
+                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+                cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+
+                # 移除累积概率超过top_p的token
+                sorted_indices_to_remove = cumulative_probs > top_p
+                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+                sorted_indices_to_remove[..., 0] = 0
+
+                indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+                logits[indices_to_remove] = -float('Inf')
+
+            probs = F.softmax(logits, dim=-1)
+
+            # 检查是否有有效的概率
+            if torch.all(probs == 0):
+                break
+
+            idx_next = torch.multinomial(probs, num_samples=1)
+            generated_sequence.append(idx_next.item())
+            idx = torch.cat((idx, idx_next), dim=1)
+
+        return idx
+
+
+class CharTokenizer:
+    """增强版分词器"""
+
+    def __init__(self, text: str = None, stoi: Dict = None):
+        if stoi is not None:
+            self.stoi = self._normalize_stoi(stoi)
+        elif text is not None:
+            chars = sorted(list(set(text)))
+            self.stoi = {ch: i for i, ch in enumerate(chars)}
+        else:
+            raise ValueError("必须提供text或stoi参数")
+
+        self.itos = {v: k for k, v in self.stoi.items()}
+        self.vocab_size = len(self.stoi)
+        self.unknown_token = '?'
+
+    def _normalize_stoi(self, stoi_dict: Dict) -> Dict:
+        """标准化stoi字典"""
+        normalized = {}
+        for k, v in stoi_dict.items():
+            if isinstance(k, str) and k.isdigit():
+                char_key = chr(int(k))
+                normalized[char_key] = int(v)
+            elif isinstance(k, int):
+                normalized[chr(k)] = int(v)
+            else:
+                normalized[k] = int(v)
+        return normalized
+
+    def encode(self, text: str) -> List[int]:
+        return [self.stoi.get(ch, 0) for ch in text]
+
+    def decode(self, indices: List[int]) -> str:
+        return ''.join([self.itos.get(i, self.unknown_token) for i in indices])
+
+    def save(self, filepath: str):
+        """保存分词器"""
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        with open(filepath, 'w', encoding='utf-8') as f:
+            json.dump(self.stoi, f, ensure_ascii=False, indent=2)
+
+    @classmethod
+    def load(cls, filepath: str):
+        """加载分词器"""
+        with open(filepath, 'r', encoding='utf-8') as f:
+            stoi = json.load(f)
+        tokenizer = cls(stoi=stoi)
+        return tokenizer
+
+    def __str__(self):
+        return f"CharTokenizer(vocab_size={self.vocab_size})"
+
+
+# ==================== 自动数据收集 ====================
+
+def collect_training_data():
+    """自动收集训练数据"""
+    data_sources = []
+
+    # 1. 使用内置的编程数据
+    programming_data = """
+# Python完整知识库
+def calculate_factorial(n):
+    if n == 0 or n == 1:
+        return 1
+    else:
+        return n * calculate_factorial(n-1)
+
+class Student:
+    def __init__(self, name, age, grade):
+        self.name = name
+        self.age = age
+        self.grade = grade
+        self.subjects = []
+
+    def add_subject(self, subject):
+        self.subjects.append(subject)
+
+    def get_average(self, scores):
+        if not scores:
+            return 0
+        return sum(scores) / len(scores)
+
+def read_file_safely(filename):
+    try:
+        with open(filename, 'r', encoding='utf-8') as file:
+            return file.read()
+    except FileNotFoundError:
+        return "文件不存在"
+
+# 数据结构和算法
+def binary_search(arr, target):
+    left, right = 0, len(arr) - 1
+    while left <= right:
+        mid = (left + right) // 2
+        if arr[mid] == target:
+            return mid
+        elif arr[mid] < target:
+            left = mid + 1
+        else:
+            right = mid - 1
+    return -1
+
+# 面向对象编程示例
+class Animal:
+    def __init__(self, name, species):
+        self.name = name
+        self.species = species
+
+    def speak(self):
+        return "动物发出声音"
+
+class Dog(Animal):
+    def __init__(self, name, breed):
+        super().__init__(name, "犬科")
+        self.breed = breed
+
+    def speak(self):
+        return "汪汪!"
+
+# 文件操作类
+class FileProcessor:
+    def __init__(self, filename):
+        self.filename = filename
+
+    def read_content(self):
+        try:
+            with open(self.filename, 'r', encoding='utf-8') as f:
+                return f.read()
+        except Exception as e:
+            return f"错误: {e}"
+
+    def write_content(self, content):
+        try:
+            with open(self.filename, 'w', encoding='utf-8') as f:
+                f.write(content)
+            return True
+        except Exception as e:
+            print(f"写入错误: {e}")
+            return False
+
+# 数学计算函数
+import math
+def quadratic_equation(a, b, c):
+    discriminant = b**2 - 4*a*c
+    if discriminant < 0:
+        return "无实数解"
+    elif discriminant == 0:
+        x = -b / (2*a)
+        return f"唯一解: x = {x}"
+    else:
+        x1 = (-b + math.sqrt(discriminant)) / (2*a)
+        x2 = (-b - math.sqrt(discriminant)) / (2*a)
+        return f"两个解: x1 = {x1}, x2 = {x2}"
+
+# 字符串处理工具
+def process_text(text):
+    lines = text.split('\\n')
+    processed_lines = []
+    for line in lines:
+        line = line.strip()
+        if line and not line.startswith('#'):
+            processed_lines.append(line)
+    return '\\n'.join(processed_lines)
+
+# 列表操作示例
+def list_operations():
+    numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    squares = [x**2 for x in numbers]
+    even_squares = [x**2 for x in numbers if x % 2 == 0]
+    return squares, even_squares
+
+# 字典操作示例
+def dict_operations():
+    student = {
+        "name": "张三",
+        "age": 20,
+        "major": "计算机科学",
+        "grades": {"数学": 90, "英语": 85, "编程": 95}
+    }
+    return student
+
+# 异常处理示例
+def safe_division(a, b):
+    try:
+        result = a / b
+        return result
+    except ZeroDivisionError:
+        return "除数不能为零"
+    except TypeError:
+        return "输入必须是数字"
+
+# 装饰器示例
+def timer(func):
+    def wrapper(*args, **kwargs):
+        import time
+        start = time.time()
+        result = func(*args, **kwargs)
+        end = time.time()
+        print(f"函数 {func.__name__} 执行时间: {end-start:.2f}秒")
+        return result
+    return wrapper
+
+@timer
+def expensive_operation(n):
+    import time
+    time.sleep(0.1)
+    return sum(range(n))
+
+# 生成器示例
+def fibonacci_generator(n):
+    a, b = 0, 1
+    for _ in range(n):
+        yield a
+        a, b = b, a + b
+
+# 上下文管理器
+class DatabaseConnection:
+    def __init__(self, db_name):
+        self.db_name = db_name
+
+    def __enter__(self):
+        print(f"连接数据库: {self.db_name}")
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        print("关闭数据库连接")
+
+    def query(self, sql):
+        print(f"执行查询: {sql}")
+        return [{"id": 1, "name": "示例数据"}]
+
+# 排序算法
+def bubble_sort(arr):
+    n = len(arr)
+    for i in range(n):
+        for j in range(0, n-i-1):
+            if arr[j] > arr[j+1]:
+                arr[j], arr[j+1] = arr[j+1], arr[j]
+    return arr
+
+def quick_sort(arr):
+    if len(arr) <= 1:
+        return arr
+    pivot = arr[len(arr)//2]
+    left = [x for x in arr if x < pivot]
+    middle = [x for x in arr if x == pivot]
+    right = [x for x in arr if x > pivot]
+    return quick_sort(left) + middle + quick_sort(right)
+
+# 数据结构
+class LinkedList:
+    class Node:
+        def __init__(self, data):
+            self.data = data
+            self.next = None
+
+    def __init__(self):
+        self.head = None
+
+    def append(self, data):
+        new_node = self.Node(data)
+        if not self.head:
+            self.head = new_node
+            return
+        current = self.head
+        while current.next:
+            current = current.next
+        current.next = new_node
+
+    def display(self):
+        elements = []
+        current = self.head
+        while current:
+            elements.append(current.data)
+            current = current.next
+        return elements
+
+class Stack:
+    def __init__(self):
+        self.items = []
+
+    def push(self, item):
+        self.items.append(item)
+
+    def pop(self):
+        if not self.is_empty():
+            return self.items.pop()
+        return None
+
+    def is_empty(self):
+        return len(self.items) == 0
+
+    def peek(self):
+        if not self.is_empty():
+            return self.items[-1]
+        return None
+
+# 主程序入口
+if __name__ == "__main__":
+    # 测试各种功能
+    print("测试开始...")
+
+    # 数学函数测试
+    result = calculate_factorial(5)
+    print(f"5的阶乘: {result}")
+
+    # 学生类测试
+    student = Student("李四", 20, "计算机科学")
+    student.add_subject("Python编程")
+    student.add_subject("数据结构")
+    print(f"学生: {student.name}, 科目: {student.subjects}")
+
+    # 排序测试
+    test_arr = [64, 34, 25, 12, 22, 11, 90]
+    sorted_arr = quick_sort(test_arr.copy())
+    print(f"排序前: {test_arr}")
+    print(f"排序后: {sorted_arr}")
+
+    print("所有测试完成!")
+"""
+    data_sources.append(programming_data)
+
+    # 2. 尝试读取外部数据文件
+    data_files = [
+        "training_data.txt",
+        "programming_data.txt",
+        "code_data.txt",
+        "python_code.txt"
+    ]
+
+    for data_file in data_files:
+        file_path = os.path.join(FILE_CONFIG['save_dir'], data_file)
+        if os.path.exists(file_path):
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    content = f.read().strip()
+                    if len(content) > TRAINING_DATA_CONFIG['min_text_length']:
+                        data_sources.append(content)
+                        print(f"✅ 加载数据文件: {data_file} ({len(content)} 字符)")
+            except Exception as e:
+                print(f"⚠ 读取数据文件 {data_file} 时出错: {e}")
+
+    # 3. 如果没有足够数据,使用扩展的默认数据
+    if len(''.join(data_sources)) < 10000:  # 如果总数据小于10k字符
+        extended_data = """
+# 更多Python编程示例
+
+# 网络请求示例
+import requests
+def fetch_url(url):
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        return response.text
+    except requests.RequestException as e:
+        return f"请求失败: {e}"
+
+# 数据处理示例
+import json
+def process_json_data(json_string):
+    try:
+        data = json.loads(json_string)
+        return data
+    except json.JSONDecodeError as e:
+        return f"JSON解析错误: {e}"
+
+def save_to_json(data, filename):
+    try:
+        with open(filename, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+        return True
+    except Exception as e:
+        print(f"保存JSON失败: {e}")
+        return False
+
+# 日期时间处理
+from datetime import datetime, timedelta
+def date_operations():
+    now = datetime.now()
+    tomorrow = now + timedelta(days=1)
+    last_week = now - timedelta(weeks=1)
+
+    return {
+        "now": now.strftime("%Y-%m-%d %H:%M:%S"),
+        "tomorrow": tomorrow.strftime("%Y-%m-%d"),
+        "last_week": last_week.strftime("%Y-%m-%d")
+    }
+
+# 正则表达式示例
+import re
+def extract_emails(text):
+    pattern = r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b'
+    return re.findall(pattern, text)
+
+def validate_phone(phone):
+    pattern = r'^1[3-9]\\d{9}$'
+    return bool(re.match(pattern, phone))
+
+# 多线程示例
+import threading
+import time
+
+class Counter:
+    def __init__(self):
+        self.value = 0
+        self.lock = threading.Lock()
+
+    def increment(self):
+        with self.lock:
+            self.value += 1
+
+def worker(counter, iterations):
+    for _ in range(iterations):
+        counter.increment()
+
+# 单元测试示例
+import unittest
+class TestMathFunctions(unittest.TestCase):
+    def test_factorial(self):
+        self.assertEqual(calculate_factorial(5), 120)
+        self.assertEqual(calculate_factorial(0), 1)
+
+    def test_binary_search(self):
+        arr = [1, 3, 5, 7, 9]
+        self.assertEqual(binary_search(arr, 5), 2)
+        self.assertEqual(binary_search(arr, 2), -1)
+
+# 主程序入口
+if __name__ == "__main__":
+    # 测试各种功能
+    print("测试开始...")
+
+    # 数学函数测试
+    result = calculate_factorial(5)
+    print(f"5的阶乘: {result}")
+
+    # 学生类测试
+    student = Student("李四", 20, "计算机科学")
+    student.add_subject("Python编程")
+    student.add_subject("数据结构")
+    print(f"学生: {student.name}, 科目: {student.subjects}")
+
+    # 排序测试
+    test_arr = [64, 34, 25, 12, 22, 11, 90]
+    sorted_arr = quick_sort(test_arr.copy())
+    print(f"排序前: {test_arr}")
+    print(f"排序后: {sorted_arr}")
+
+    print("所有测试完成!")
+"""
+        data_sources.append(extended_data)
+
+    # 合并所有数据源
+    combined_data = '\n'.join(data_sources)
+
+    # 数据重复以增加训练样本
+    combined_data = combined_data * TRAINING_DATA_CONFIG['data_repetition']
+
+    print(f"📊 总训练数据: {len(combined_data):,} 字符")
+    return combined_data
+
+
+# ==================== 优化的训练函数 ====================
+
+class EnhancedTrainingMonitor:
+    """增强的训练监控器"""
+
+    def __init__(self, save_dir: str = FILE_CONFIG['save_dir']):
+        self.losses = []
+        self.start_time = time.time()
+        self.save_dir = save_dir
+        os.makedirs(save_dir, exist_ok=True)
+        self.best_loss = float('inf')
+        self.patience = 100
+        self.checkpoint_frequency = 50  # 每50轮保存一次
+
+    def update(self, loss, epoch, model=None, tokenizer=None):
+        self.losses.append(loss)
+        elapsed = time.time() - self.start_time
+
+        # 进度显示
+        if epoch % 20 == 0 or epoch < 10 or epoch == TRAINING_CONFIG['epochs'] - 1:
+            epochs_per_sec = (epoch + 1) / elapsed
+            eta = (TRAINING_CONFIG['epochs'] - epoch - 1) / epochs_per_sec if epochs_per_sec > 0 else 0
+
+            print(f"Epoch {epoch:4d} | Loss: {loss:.4f} | "
+                  f"Speed: {epochs_per_sec:.2f} epoch/s | ETA: {eta:.0f}s")
+
+            # 定期保存检查点
+            if model and epoch % self.checkpoint_frequency == 0 and epoch > 0:
+                self.save_checkpoint(model, tokenizer, epoch, loss)
+
+            # 保存最佳模型
+            if loss < self.best_loss:
+                self.best_loss = loss
+                if model and epoch > 100:  # 100轮后才开始保存最佳模型
+                    self.save_best_model(model, tokenizer, epoch, loss)
+
+    def save_checkpoint(self, model, tokenizer, epoch, loss):
+        """保存检查点"""
+        checkpoint = {
+            'epoch': epoch,
+            'model_state_dict': model.state_dict(),
+            'loss': loss,
+            'config': model.config,
+            'tokenizer': tokenizer.stoi,
+            'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S")
+        }
+        path = os.path.join(self.save_dir, f"checkpoint_epoch_{epoch}.pth")
+        torch.save(checkpoint, path)
+        print(f"💾 检查点已保存: {path}")
+
+    def save_best_model(self, model, tokenizer, epoch, loss):
+        """保存最佳模型"""
+        model_data = {
+            'model_state_dict': model.state_dict(),
+            'config': model.config,
+            'tokenizer': tokenizer.stoi,
+            'training_losses': self.losses,
+            'epoch': epoch,
+            'loss': loss,
+            'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S"),
+            'global_configs': {
+                'MODEL_CONFIG': MODEL_CONFIG,
+                'TRAINING_CONFIG': TRAINING_CONFIG,
+                'GENERATION_CONFIG': GENERATION_CONFIG
+            }
+        }
+
+        model_path = os.path.join(self.save_dir, f"{FILE_CONFIG['model_prefix']}_best.pth")
+        torch.save(model_data, model_path)
+        print(f"🏆 最佳模型已保存: {model_path} (loss: {loss:.4f})")
+
+    def plot_loss(self):
+        """绘制损失曲线"""
+        try:
+            import matplotlib.pyplot as plt
+            plt.figure(figsize=(12, 6))
+            plt.plot(self.losses)
+            plt.title('Training Loss Progress')
+            plt.xlabel('Epoch')
+            plt.ylabel('Loss')
+            plt.grid(True, alpha=0.3)
+            loss_path = os.path.join(self.save_dir, 'training_loss.png')
+            plt.savefig(loss_path, dpi=150, bbox_inches='tight')
+            print(f"✓ 损失曲线已保存: {loss_path}")
+        except ImportError:
+            print("⚠ 未安装matplotlib,无法绘制损失曲线")
+
+
+def get_improved_learning_rate(epoch, warmup_epochs=20):
+    """改进的学习率调度"""
+    if epoch < warmup_epochs:
+        # 线性预热
+        return TRAINING_CONFIG['learning_rate'] * (epoch + 1) / warmup_epochs
+    else:
+        # 余弦退火
+        progress = (epoch - warmup_epochs) / (TRAINING_CONFIG['epochs'] - warmup_epochs)
+        return TRAINING_CONFIG['learning_rate'] * 0.5 * (1 + math.cos(math.pi * progress))
+
+
+def improved_train_gpt(model: OptimizedGPT, X: torch.Tensor, Y: torch.Tensor, tokenizer: CharTokenizer):
+    """改进的训练函数"""
+    model.train()
+
+    optimizer = torch.optim.AdamW(
+        model.parameters(),
+        lr=TRAINING_CONFIG['learning_rate'],
+        weight_decay=TRAINING_CONFIG['weight_decay'],
+        betas=(0.9, 0.95)
+    )
+
+    monitor = EnhancedTrainingMonitor(FILE_CONFIG['save_dir'])
+
+    print(f"🚀 开始训练GPT模型")
+    print(f"📊 总轮数: {TRAINING_CONFIG['epochs']}")
+    print(f"🔢 模型参数: {sum(p.numel() for p in model.parameters()):,}")
+    print(f"📚 训练样本: {len(X):,}")
+
+    # 设备设置
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"💻 使用设备: {device}")
+    model.to(device)
+    X, Y = X.to(device), Y.to(device)
+
+    best_loss = float('inf')
+    patience_counter = 0
+
+    for epoch in range(TRAINING_CONFIG['epochs']):
+        # 动态学习率
+        lr = get_improved_learning_rate(epoch, TRAINING_CONFIG['warmup_epochs'])
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = lr
+
+        # 训练步骤
+        optimizer.zero_grad()
+
+        indices = torch.randint(0, len(X), (TRAINING_CONFIG['batch_size'],))
+        x_batch = X[indices]
+        y_batch = Y[indices]
+
+        logits, loss = model(x_batch, y_batch)
+        loss.backward()
+
+        # 梯度裁剪
+        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=TRAINING_CONFIG['grad_clip'])
+        optimizer.step()
+
+        monitor.update(loss.item(), epoch, model, tokenizer)
+
+        # 早停检查
+        if loss.item() < best_loss:
+            best_loss = loss.item()
+            patience_counter = 0
+        else:
+            patience_counter += 1
+
+        # 早停条件
+        if patience_counter >= monitor.patience and epoch > 300:
+            print(f"🛑 早停触发,第{epoch}轮")
+            break
+
+        # 损失足够小提前停止
+        if loss.item() < TRAINING_CONFIG['min_loss'] and epoch > 200:
+            print(f"✅ 训练完成,损失已达目标值 {loss.item():.4f}")
+            break
+
+    print("🎉 训练完成!")
+    monitor.plot_loss()
+    return monitor.losses
+
+
+# ==================== 工具函数 ====================
+
+def create_improved_sample_dataset(text: str, block_size: int = None) -> Tuple[
+    torch.Tensor, torch.Tensor, 'CharTokenizer']:
+    """创建改进的训练数据集"""
+    block_size = block_size or TRAINING_CONFIG['block_size']
+
+    # 文本预处理
+    lines = text.split('\n')
+    cleaned_lines = []
+    for line in lines:
+        line = line.strip()
+        if line and not line.startswith('#'):  # 移除空行和注释
+            cleaned_lines.append(line)
+    text = '\n'.join(cleaned_lines)
+
+    # 数据增强
+    text = text * TRAINING_DATA_CONFIG['data_repetition']
+
+    tokenizer = CharTokenizer(text)
+
+    if len(text) < block_size + 1:
+        print("⚠ 文本较短,使用重叠采样")
+        data = torch.tensor(tokenizer.encode(text), dtype=torch.long)
+        while len(data) < block_size + 1000:
+            data = torch.cat([data, data])
+        data = data[:block_size + 2000]
+    else:
+        data = torch.tensor(tokenizer.encode(text), dtype=torch.long)
+
+    n = len(data) - block_size
+    if n <= 0:
+        raise ValueError("无法创建训练样本")
+
+    # 创建训练样本
+    X = torch.stack([data[i:i + block_size] for i in range(0, n, 1)])  # 步长为1获取更多样本
+    Y = torch.stack([data[i + 1:i + block_size + 1] for i in range(0, n, 1)])
+
+    print(f"✅ 创建了 {len(X):,} 个训练样本")
+    print(f"🔤 词汇表大小: {tokenizer.vocab_size}")
+    return X, Y, tokenizer
+
+
+def format_generated_text(text: str, start_text: str) -> str:
+    """格式化生成的文本"""
+    # 移除起始文本
+    if text.startswith(start_text):
+        generated_part = text[len(start_text):]
+    else:
+        generated_part = text
+
+    # 清理文本
+    lines = generated_part.split('\n')
+    cleaned_lines = []
+
+    for line in lines:
+        line = line.strip()
+        if line:
+            # 简单的代码格式检测
+            if any(keyword in line for keyword in ['def ', 'class ', 'import ', 'from ', 'if ', 'for ', 'while ']):
+                cleaned_lines.append(line)
+            elif line.startswith('#') or line.startswith('"""') or line.startswith("'''"):
+                cleaned_lines.append(line)
+            elif '=' in line or ':' in line or line.endswith(':'):
+                cleaned_lines.append(line)
+            elif len(line) > 10:  # 保留较长的文本行
+                cleaned_lines.append(line)
+
+    return '\n'.join(cleaned_lines)
+
+
+def interactive_generation(model: OptimizedGPT, tokenizer: CharTokenizer):
+    """改进的交互式文本生成 - 使用全局参数"""
+    print("\n" + "=" * 60)
+    print("🤖 进入交互式生成模式")
+    print("💡 提示: 输入Python代码片段或自然语言描述")
+    print("⏹️  退出: 输入 'quit', 'exit', 或 '退出'")
+    print("🔧 使用全局生成参数:")
+    print(f"   🌡️  温度: {GENERATION_CONFIG['temperature']}")
+    print(f"   🔝 Top-K: {GENERATION_CONFIG['top_k']}")
+    print(f"   📏 生成长度: {GENERATION_CONFIG['max_tokens']}")
+    print("=" * 60)
+
+    model.eval()
+
+    while True:
+        try:
+            print("\n" + "-" * 40)
+            user_input = input("🎯 请输入起始文本: ").strip()
+
+            if user_input.lower() in ['quit', 'exit', '退出']:
+                break
+
+            if not user_input:
+                print("⚠ 输入不能为空,请重新输入。")
+                continue
+
+            print(f"⚡ 生成中...", end='', flush=True)
+            start_time = time.time()
+
+            # 使用全局配置参数
+            start_tokens = torch.tensor([tokenizer.encode(user_input)], dtype=torch.long)
+
+            with torch.no_grad():
+                generated_tokens = model.generate(
+                    start_tokens,
+                    max_new_tokens=GENERATION_CONFIG['max_tokens'],
+                    temperature=GENERATION_CONFIG['temperature'],
+                    top_k=GENERATION_CONFIG['top_k'],
+                    top_p=GENERATION_CONFIG['top_p'],
+                    repetition_penalty=GENERATION_CONFIG['repetition_penalty']
+                )
+
+            elapsed = time.time() - start_time
+            print(f"完成! (耗时: {elapsed:.2f}s)")
+
+            # 解码和格式化
+            full_text = tokenizer.decode(generated_tokens[0].tolist())
+            formatted_text = format_generated_text(full_text, user_input)
+
+            print(f"\n📊 生成结果:")
+            print("=" * 50)
+            print(f"🎯 起始: {user_input}")
+            print("-" * 50)
+            if formatted_text:
+                print(formatted_text)
+            else:
+                # 如果格式化后为空,显示原始生成文本(截断)
+                display_text = full_text[len(user_input):]
+                if len(display_text) > 300:
+                    display_text = display_text[:300] + "..."
+                print(display_text)
+            print("=" * 50)
+            print(f"📏 总长度: {len(full_text)} 字符")
+
+        except KeyboardInterrupt:
+            print("\n\n🛑 用户中断,退出交互模式")
+            break
+        except Exception as e:
+            print(f"❌ 生成时出错: {e}")
+
+
+def get_available_models(save_dir: str = None) -> List[Tuple[str, str]]:
+    """获取所有可用的模型文件"""
+    save_dir = save_dir or FILE_CONFIG['save_dir']
+    if not os.path.exists(save_dir):
+        return []
+
+    model_files = [f for f in os.listdir(save_dir) if f.endswith('.pth') and 'checkpoint' not in f]
+    if not model_files:
+        return []
+
+    model_files.sort(key=lambda x: os.path.getmtime(os.path.join(save_dir, x)), reverse=True)
+    return [(f, os.path.join(save_dir, f)) for f in model_files]
+
+
+def select_model_interactively() -> str:
+    """交互式选择模型文件"""
+    available_models = get_available_models()
+
+    if not available_models:
+        print("❌ 在 my_gptmodel 目录中未找到任何模型文件")
+        return None
+
+    print("\n📂 可用的模型文件:")
+    print("-" * 60)
+    for i, (filename, full_path) in enumerate(available_models, 1):
+        mtime = os.path.getmtime(full_path)
+        mtime_str = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
+        size = os.path.getsize(full_path) / 1024 / 1024  # MB
+        print(f"{i:2d}. {filename}")
+        print(f"    修改时间: {mtime_str} | 大小: {size:.1f}MB")
+
+    while True:
+        try:
+            choice = input(f"\n🎲 请选择模型文件 (1-{len(available_models)}): ").strip()
+            if not choice:
+                return available_models[0][1]
+
+            index = int(choice) - 1
+            if 0 <= index < len(available_models):
+                return available_models[index][1]
+            else:
+                print(f"⚠ 请输入 1-{len(available_models)} 之间的数字")
+        except ValueError:
+            print("⚠ 请输入有效的数字")
+
+
+class AdvancedGPT(OptimizedGPT):
+    """增强版GPT"""
+
+    def __init__(self, config: GPTConfig):
+        super().__init__(config)
+
+    @classmethod
+    def from_pretrained(cls, model_path: str, weights_only=False):
+        """从预训练文件加载模型"""
+        try:
+            checkpoint = torch.load(model_path, map_location='cpu', weights_only=weights_only)
+            config = checkpoint['config']
+            model = cls(config)
+            model.load_state_dict(checkpoint['model_state_dict'])
+            global_configs = checkpoint.get('global_configs', {})
+            return model, checkpoint.get('tokenizer', None), checkpoint.get('training_losses', []), global_configs
+        except Exception as e:
+            print(f"❌ 加载模型时出错: {e}")
+            raise
+
+
+def load_and_test_model(model_path: str = None):
+    """加载并测试模型"""
+    try:
+        if model_path is None:
+            model_path = select_model_interactively()
+            if model_path is None:
+                return None, None
+
+        print(f"📥 加载模型: {model_path}")
+        model, tokenizer_dict, losses, global_configs = AdvancedGPT.from_pretrained(model_path, weights_only=False)
+
+        tokenizer = CharTokenizer(stoi=tokenizer_dict)
+        print(f"✅ 模型加载成功")
+        print(f"🔤 词汇表大小: {tokenizer.vocab_size}")
+
+        # 改进的测试生成
+        test_prompts = [
+            "def calculate",
+            "class Student",
+            "import pandas",
+            "for i in range",
+            "# 单元测试",
+            "def read_file"
+        ]
+
+        print(f"\n🧪 模型测试生成:")
+        print("-" * 40)
+
+        for i, prompt in enumerate(test_prompts[:3], 1):  # 只测试前3个
+            print(f"\n测试 {i}: '{prompt}'")
+            start_tokens = torch.tensor([tokenizer.encode(prompt)], dtype=torch.long)
+
+            with torch.no_grad():
+                generated = model.generate(start_tokens, max_new_tokens=100)
+
+            result = tokenizer.decode(generated[0].tolist())
+            formatted = format_generated_text(result, prompt)
+            if formatted:
+                print(formatted[:200] + "..." if len(formatted) > 200 else formatted)
+            else:
+                print("⚠ 生成结果为空")
+
+        return model, tokenizer
+
+    except Exception as e:
+        print(f"❌ 加载模型失败: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, None
+
+
+def create_programming_data_file():
+    """创建编程数据文件"""
+    programming_data_path = os.path.join(FILE_CONFIG['save_dir'], FILE_CONFIG['programming_data_file'])
+
+    if not os.path.exists(programming_data_path):
+        print(f"📝 创建编程数据文件: {programming_data_path}")
+        programming_data = collect_training_data()
+
+        try:
+            with open(programming_data_path, 'w', encoding='utf-8') as f:
+                f.write(programming_data.strip())
+            print(f"✅ 编程数据文件已创建: {programming_data_path}")
+            print("💡 您可以将自己的Python代码数据添加到这个文件中")
+        except Exception as e:
+            print(f"❌ 创建编程数据文件时出错: {e}")
+
+
+def main():
+    """主函数"""
+    try:
+        # 创建保存目录
+        save_dir = FILE_CONFIG['save_dir']
+        os.makedirs(save_dir, exist_ok=True)
+
+        print("🤖 GPT语言模型训练与生成系统")
+        print("=" * 60)
+        print(f"📁 文件保存目录: {save_dir}")
+        print(f"⚙️  模型配置: {MODEL_CONFIG}")
+        print(f"⚙️  训练配置: {TRAINING_CONFIG}")
+        print(f"⚙️  生成配置: {GENERATION_CONFIG}")
+
+        # 1. 自动收集训练数据
+        print("\n1. 📚 收集训练数据...")
+        training_data = collect_training_data()
+
+        # 2. 创建数据集
+        print("\n2. 🗂️  创建数据集...")
+        X, Y, tokenizer = create_improved_sample_dataset(training_data)
+        print(f"   ✅ 数据集: {len(X):,} 样本")
+        print(f"   🔤 词汇表: {tokenizer.vocab_size} 字符")
+
+        # 3. 创建模型
+        print("\n3. 🧠 创建模型...")
+        config = GPTConfig(vocab_size=tokenizer.vocab_size)
+        print(f"   {config}")
+
+        model = OptimizedGPT(config)
+        print(f"   ✅ 参数数量: {sum(p.numel() for p in model.parameters()):,}")
+
+        # 4. 训练模型
+        print("\n4. 🏋️  训练模型...")
+        losses = improved_train_gpt(model, X, Y, tokenizer)
+
+        # 5. 保存最终模型
+        print("\n5. 💾 保存最终模型...")
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_path = os.path.join(save_dir, f"{FILE_CONFIG['model_prefix']}_final_{timestamp}.pth")
+        tokenizer_path = os.path.join(save_dir, f"{FILE_CONFIG['tokenizer_prefix']}_{timestamp}.json")
+
+        torch.save({
+            'model_state_dict': model.state_dict(),
+            'config': config,
+            'tokenizer': tokenizer.stoi,
+            'training_losses': losses,
+            'timestamp': timestamp,
+            'global_configs': {
+                'MODEL_CONFIG': MODEL_CONFIG,
+                'TRAINING_CONFIG': TRAINING_CONFIG,
+                'GENERATION_CONFIG': GENERATION_CONFIG
+            }
+        }, model_path)
+
+        tokenizer.save(tokenizer_path)
+        print(f"   ✅ 模型已保存: {model_path}")
+        print(f"   ✅ 分词器已保存: {tokenizer_path}")
+
+        # 6. 交互式生成
+        print("\n6. 🎮 进入交互模式...")
+        interactive_generation(model, tokenizer)
+
+    except Exception as e:
+        print(f"❌ 程序执行出错: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+def auto_detect_and_run():
+    """自动检测并运行"""
+    print("🔍 GPT语言模型自动检测系统")
+    print("=" * 50)
+
+    available_models = get_available_models()
+
+    if available_models:
+        print(f"📂 检测到 {len(available_models)} 个现有模型")
+        print("🔄 自动加载最新模型...")
+
+        latest_model_path = available_models[0][1]
+        model, tokenizer = load_and_test_model(latest_model_path)
+
+        if model and tokenizer:
+            print("\n✅ 模型加载成功,进入交互式生成模式")
+            interactive_generation(model, tokenizer)
+        else:
+            print("❌ 模型加载失败,开始训练新模型...")
+            main()
+    else:
+        print("❌ 未检测到现有模型,开始训练新模型...")
+        main()
+
+
+if __name__ == "__main__":
+    # 自动检测并运行:有模型就加载,没有就训练
+    auto_detect_and_run()

+ 56 - 0
src/LinearAlgebra/main_matrix.py

@@ -0,0 +1,56 @@
+from playLA.Matrix import Matrix  ## 引入矩阵类
+from playLA.Vector import Vecotr
+
+if __name__ == "__main__":
+
+    matrix = Matrix([[1,2] , [3,4]])  ## 简单的二维数组
+    print(matrix)
+    print("matrix.shape = {}".format(matrix.shape()))
+    print("matrix.row_num = {}".format(matrix.row_num()))
+    print("matrix.col_num = {}".format(matrix.col_num()))
+    print("matrix.size = {}".format(matrix.size()))
+    print("len(matrix) = {}".format(len(matrix)))
+    print("matrix[0][0] = {}".format(matrix[0,0]))
+
+    matrix2 = Matrix([[5,6] , [7,8]])
+    print("add:{}".format(matrix+matrix2))
+    print("subtract:{}".format(matrix-matrix2))
+    print("scalar-mul:{}".format(matrix * 2))
+    print("scalar-mul:{}".format(2 * matrix))
+    print("truediv:{}".format(matrix / 2))
+    print("pos:{}".format(+matrix))
+    print("neg:{}".format(-matrix))
+    print("zero_2_3:{}".format(Matrix.zero(2,3)))
+
+    T = Matrix([[1.5 , 0] , [0,2]])
+    p = Vecotr([5,3])
+    print("T.dot(p) = {}".format(T.dot(p)))
+
+    P = Matrix([[0,4,5] , [0,0,3]])
+    print("T.dot(P) = {}".format(T.dot(P)))
+
+    ## 矩阵乘法交换律 ? A*B  B*A  (方阵) 结果不同~=! 不遵循交换律!
+    print("A.dot(B) = {}".format(matrix.dot(matrix2)))
+    print("B.dot(A) = {}".format(matrix2.dot(matrix)))
+
+
+
+    # 创建一个2×3矩阵
+    m = Matrix([[1, 2, 3], [4, 5, 6]])
+    print(m)  # Matrix([[1, 2, 3], [4, 5, 6]])
+    print(m.T())  # Matrix([[1, 4], [2, 5], [3, 6]])
+
+    # 验证转置性质
+    print(m.shape())  # (2, 3)
+    print(m.T().shape())  # (3, 2) 行数列数互换
+
+
+    ## 单位矩阵
+    I = Matrix.identity(2)
+    print(I)
+
+    print("A.dot(I) = {}".format(matrix.dot(I)))
+    print("I.dot(A) = {}".format(I.dot(matrix)))
+
+
+

+ 51 - 0
src/LinearAlgebra/main_matrix_transformation.py

@@ -0,0 +1,51 @@
+import math
+
+import matplotlib.pyplot as plt  # matrix plot library
+from numpy.matrixlib.defmatrix import matrix
+
+from playLA.Matrix import Matrix
+
+if __name__ == "__main__":
+
+    ## 对点集进行绘制 - F 图形
+    points = [[0,0] ,[0,5] , [3,5] , [3,4] , [1,4],
+              [1,3] , [2,3] , [2,2] , [1,2] , [1,0]]
+    ## 将points点集 拆乘 x y 两个分量 , 横、纵坐标放在一个列表中
+    x = [point[0] for point in points]
+    y = [point[1] for point in points]
+
+    plt.figure(figsize=(5,5))  ## 调整绘制窗口大小  , 单位英尺?英寸
+    plt.xlim(-10  , 10)  ## x 坐标轴范围
+    plt.ylim(-10  , 10)  ## y 坐标轴范围
+    ## 参数  - 要绘制的 横纵坐标 - x,y
+    plt.plot(x , y)
+    # plt.show()
+
+
+    ## 二维坐标列表放入矩阵中
+    P = Matrix(points)
+    ## 缩放矩阵 -  横坐标扩大2倍  , 纵坐标扩大1.5
+    # T = Matrix([[2,0] , [0,1.5]])
+    ## 翻转变换矩阵
+    # T = Matrix([[1,0] , [0,-1]])
+    ## 错切 x
+    # T = Matrix([[1,0.5] , [0 ,1]])
+
+    ## 错切 y
+    # T = Matrix([[1,0] , [0.5 ,1]])
+
+
+    ## 旋转  - 通常 在数学库中 , 对于旋转的角度 都是使用 弧度制  - 60° ->   Π/3
+    theta  = math.pi / 3
+    ## 图形沿着原点  顺时针旋转 theta 度
+    T  = Matrix([[math.cos(theta) , math.sin(theta)] , [-math.sin(theta) , math.cos(theta)]])
+
+    ## 矩阵乘法 乘点坐标 P需要转置 才能相乘
+    P2 = T.dot(P.T())
+    ## 直接分离 P2 的横纵坐标
+    plt.plot([P2.col_vector(i)[0] for i in range(P2.col_num())] ,  ## 从列向量中取出 第i个列向量 , 他的第0个分量 对应每个点的横坐标
+             [P2.col_vector(i)[1] for i in range(P2.col_num())])  ## 从列向量中取出 第i个列向量 , 他的第1个分量 对应每个点的纵坐标
+    plt.show()
+
+
+

+ 52 - 0
src/LinearAlgebra/main_numpy_matrix.py

@@ -0,0 +1,52 @@
+import numpy as np  # 引入numpy库  np简化别名
+
+if __name__ == "__main__":
+
+    A = np.matrix([[1,2] , [3,4]])
+    print(A)
+    ## 矩阵属性
+    print(A.shape)
+    print(A.T)
+    ## 获取矩阵的元素
+    print(A[1,1])
+    print(A[0])
+    print(A[:,0])
+
+    ## 矩阵基本运算
+    print("矩阵基本运算")
+    B = np.array([[5,6] , [7,8]])
+    print(A + B )
+    print(A - B )
+    print(10 * A )
+    print(A * 10)
+    print(A * B )
+    print(A.dot(B))
+
+    p = np.array([10 , 100])
+    print(A + p)
+    print(A + 1)
+    print(A.dot(p))
+
+
+    ## 单位矩阵
+    I = np.identity(2)
+    print(I)
+    print(A.dot(I))
+    print(I.dot(A))
+
+    # 逆矩阵 -    np.linalg -> linear algebra 子模块下 调用方法
+    invA = np.linalg.inv(A)
+    print(invA)
+    # A的逆矩阵乘A = I 单位矩阵  (计算机存在误差 结果 可能不是0  1.11022302e-16)
+    # 类设计  - 当判断一个浮点数是否为零的时候, _global中设置了 EPSILONE 精度至  , 如果小于这个精度就说是等于零
+    print(invA.dot(A))
+    print(A.dot(invA))
+
+    ## 试验  - 只有方阵 才存在逆矩阵
+    try :
+        C = np.array([[1, 2, 3], [4, 5, 6]])
+        np.linalg.inv(C)
+    except Exception as ex:
+        print(f"--- Caught an exception! ---")
+        print(f"Error Type: {type(ex).__name__}")
+        print(f"Error Message: {str(ex)}")  # 最推荐的获取消息的方式

+ 91 - 0
src/LinearAlgebra/main_numpy_vector.py

@@ -0,0 +1,91 @@
+import numpy as np  # 引入numpy库  np简化别名
+
+if __name__ == "__main__":
+
+
+    # 查看当前np对应版本号
+    print(np.__version__)
+    lst = [1,2,3]  # python 自身就带有列表的概念
+    """"在这样的基础上为什么还要封装 numpy 的 向量 相应的对象 ? 
+    难道不吭可以直接使用python本身的列表来表示向量?
+    除了和面向对象相关,我们把它封装成一个专门的向量对象
+    可以非常方便的定义属于这个对象的相应的方法
+    还有非常重要的: python的列表中 本身里面是可以存储任意类型的数据的
+    即使这些数据类型不一致
+    """
+    # 比如:
+    lst[0] = "linear algebra"
+    print(lst)
+    """python 列表的本质 其实 是一个 动态的数组
+    对于数组这种数据结构 , 它本质是为了存储数据 
+    而并不是为了数学计算 
+    而 numpy 里的向量 只能存储一种数据类型
+    我们基于numpy对象进行运算速度是非常快的"""
+
+
+    ## 声明一些numpy向量
+    vec = np.array([1,2,3])  # 传入python列表
+    print(vec)
+
+    ## 可更改的向量类
+    vec[0] = 666
+    print(vec)
+    vec[0] = 1
+
+
+    ## 创建零向量
+    print(np.zeros(6))  ## 默认创建向量里的数据类型是浮点型  [0. 0. 0. 0. 0. 0.]
+    ## one向量
+    print(np.ones(6))
+    ## 统一向量
+    print(np.full(6 , 666))
+
+
+    ## 向量的基本属性 :
+    ## 向量有多少元素
+    print( "size = ", vec.size)
+    ## np中重载的 len 函数
+    print(len(vec))
+    print(vec[0]) # 第一个元素
+    print(vec[-1]) # 最后一个元素
+    print(vec[0:2]) # 切片方式查看前两位
+    """也就是说把原向量 前两个维度抽离出来 组成了一个新的二维向量"""
+    print(type(vec[0:2])) # <class 'numpy.ndarray'>
+
+    ## np.array 的基本运算
+    vec2 = np.array([3,5,6])
+    vec2[0] = 4
+    ## 向量相加
+    print("{} + {} = {}".format(vec , vec2 , vec + vec2))
+    ## 相减
+    print("{} - {} = {}".format(vec , vec2 , vec - vec2))
+    ## 数量乘法
+    print("{} * {} = {}".format(2 , vec2 , 2 *  vec2))
+    ## 数量乘法 - 标量 右侧
+    print("{} * {} = {}".format(vec2 , 2 , vec2 *  2))
+    ## 向量相乘 - element-wise multiplication  两个向量分量逐个相乘得到的向量结果 (这样的乘法是没有实际数学意义)
+    print("{} * {} = {}".format(vec , vec2 , vec *  vec2))
+    ## 点乘
+    print("{}.dot({}) = {}".format(vec , vec2 , vec.dot(vec2)))
+    ## 求模   np.linalg -> linear algebra 子模块下 调用方法
+    print(np.linalg.norm(vec))
+    ## 向量的规范化 / 归一化 / 单位向量
+    print(vec / np.linalg.norm(vec)); """"将vec这个向量中的每一个元素都除以这个向量的模 , 得到单位向量"""
+    ## 求单位向量的模 1
+    print(np.linalg.norm(vec / np.linalg.norm(vec)))
+
+    """由于numpy中没有封装求一个向量的单位向量方法
+    所以在做除法的时候 如果 除数 是0  , 这种情况 需要自行处理"""
+    try:
+        zero3 = np.zeros(3)
+        zero3 / np.linalg.norm(zero3)
+    except RuntimeWarning:
+        print("除数不能为向量~")
+
+
+
+
+
+
+
+

+ 41 - 0
src/LinearAlgebra/main_vector.py

@@ -0,0 +1,41 @@
+from playLA.Vector import Vecotr
+
+if __name__ == "__main__":
+    vec = Vecotr([5,2])
+    print(vec)
+    print(len(vec))
+    print("vec[0] = {} , vec[1] = {}".format(vec[0] , vec[1]))
+    vec2 = Vecotr([3,1])
+    print("{} + {} = {}".format(vec , vec2 , vec + vec2))
+    print("{} - {} = {}".format(vec , vec2 , vec - vec2))
+    print("{} * {} = {}".format(vec , 3 , vec * 3))
+    print("{} * {} = {}".format(3, vec, 3 * vec))
+
+    print("+{} = {}".format(vec, +vec))
+    print("-{} = {}".format(vec, -vec))
+
+    zero2 = Vecotr.zero(2)
+    print(zero2)
+    print("{} + {} = {}".format(vec , zero2 , vec + zero2))
+
+    # 向量模
+    print("norm({}) = {}".format(vec,vec.norm()))
+    print("norm({}) = {}".format(zero2,zero2.norm()))
+
+    # 单位向量
+    print("normalize {} is {} ".format(vec , vec.normalize()))
+    print(vec.normalize().norm())
+
+    print("normalize {} is {} ".format(vec2 , vec2.normalize()))
+    print(vec2.normalize().norm())
+
+    try:
+        print(zero2.normalize().norm())
+    except ZeroDivisionError:
+        print("Cannot normalize zero vector {}".format(zero2))
+
+    # 点乘
+    print(vec.dot(vec2))
+
+
+

BIN
src/LinearAlgebra/my_gpt_model.pth


BIN
src/LinearAlgebra/my_gptmodel/best_model_epoch_150.pth


+ 132 - 0
src/LinearAlgebra/playLA/Matrix.py

@@ -0,0 +1,132 @@
+from .Vector import Vecotr   ##  在当前这个包的Vector文件中 引入vector类
+
+
+class Matrix :
+
+    def __init__(self , list2d):  ## 二维数组 表示矩阵
+        self._values = [row[:] for row in list2d]  ## 取出 list2d 中的每一行 , 把每行的内容都复制一遍 , 形成一个新的列表 , 这些列表又组成 一个新列表(二维数组) [:]“全选”的切片操作
+
+    @classmethod   ## 类方法 , 调用这个发放需要重建矩阵实例 ,直接在矩阵类上调用
+    def zero(cls , r, c):
+        """返回一个r行c列的零矩阵
+        @classmethod 的核心作用 它允许你在不创建类的实例的情况下,调用这个方法,并且这个方法可以访问和操作类本身。
+          @staticmethod"""
+        return cls([[0] * c for _ in range(r)])
+
+    @classmethod
+    def identity(cls , n):
+        """"返回一个n行n列的单位矩阵"""
+        m = [[0] * n for _ in range(n)]   ## 二位列表 n行 每一行都有n个 0
+        for i  in range(n) : # 从 0 - n
+            m[i][i] = 1;     # 第i行i列改为1 , 其他保持0
+        return  cls(m)   # 返回matrix对象
+
+    def T(self):
+        """返回矩阵的转置矩阵
+        将原矩阵的行转为列,列转为行
+        """
+        # 使用列表推导式创建转置矩阵
+        # 对于每一列j,取出所有行在该列的元素组成新的行
+        return Matrix([[self._values[i][j] for i in range(self.row_num())]
+                      for j in range(self.col_num())])
+
+    def shape(self):
+        """返回矩阵的形状 : (行数 , 列数)
+        由于包含 (行数 , 列数) 两个信息 所以通常是返回 二元组 形式
+        行 : len(self._values)  列表的长度
+        列 : len(self._values[0]) 第一个行向量 , 里面有几个元素 相应就是有几列  (对于这个矩阵来说,每一行个数都一样 , 并且矩阵至少要有一行) """
+        return len(self._values) , len(self._values[0])
+
+    def dot (self , another):
+        """返回矩阵乘法结果"""
+        if isinstance(another , Vecotr): # 传入是否是向量
+            ## 矩阵和向量的乘法  矩阵列数是否等于向量的元素数量?
+            assert self.col_num() == len(another) ,\
+                "Error in Matrix-Vector Multiplication."
+            ## 矩阵 * 向量 得到的还是向量
+            return Vecotr([self.row_vector(i).dot(another) for i in range(self.row_num())])
+        if isinstance(another , Matrix) : # 传入是否是矩阵
+            ## 矩阵和矩阵的乘法    前矩阵列数 必须等于 后矩阵行数
+            assert  self.col_num() == another.row_num() ,\
+                "Error in Matrix-Matrix Multiplication . "
+            return Matrix([[self.row_vector(i).dot(another.col_vector(j))
+                            for j in range(another.col_num())]
+                           for i in range(self.row_num())])
+        """每一次从前面的矩阵拿出一行 对于这一行向量 - 每次从后面的矩阵拿出一列
+        前面的第i行和后面的第j列 进行点乘  点乘的结果就算结果矩阵的第 ij个元素 - i行j列"""
+
+    def row_num(self):
+        """返回矩阵的行数
+        shape 元组的第1个 元素 就是 矩阵 行数"""
+        return self.shape()[0]
+
+    def col_num(self):
+        """返回矩阵列数
+        shape 元组的第2个 元素 就是 矩阵 列数"""
+        return self.shape()[1]
+
+    def size(self):
+        """返回矩阵的元素个数"""
+        r ,c = self.shape()
+        return r*c
+
+    def row_vector(self , index):
+        """返回矩阵的第index个行向量"""
+        return  Vecotr(self._values[index])
+    def col_vector(self , index):
+        """返回矩阵的第index个列向量
+        取出矩阵对应每一行 把每一行的第index个元素拿出来 , 构成一个向量"""
+        return Vecotr([row[index] for row in self._values])
+
+
+    ## 打印的时候显示
+    def __repr__(self):
+        return "Matrix({}).".format(self._values)
+    __str__ = __repr__  ## 让他们相等 不区分方法返回字符串
+
+    __len__ = row_num ## 等同于 二维列表多少行
+
+    def __getitem__(self, pos):
+        """返回矩阵pos位置的元素  ,  利用元组 传递行、列"""
+        r , c = pos  ##  元组解包(Tuple Unpacking)
+        return self._values[r][c]
+    def __add__(self, another):
+        """返回两个矩阵的加法结果
+        先判断两个矩阵形状一样"""
+        assert  self.shape() == another.shape(),\
+            "Error in adding . Shape of matrix must be same ."
+        """python特有的列表表达式 - 更加 pythonic !
+        新建一个新的matrix类 传入一个新的 加和 二维数组"""
+        return Matrix([[a + b for a,b in zip(self.row_vector(i) , another.row_vector(i))]
+                       for i in range(self.row_num())])
+    def __sub__(self, another):
+        """返回两个矩阵的减法结果
+        先判断两个矩阵形状一样"""
+        assert  self.shape() == another.shape(),\
+            "Error in subtracting . Shape of matrix must be same ."
+        """python特有的列表表达式 - 更加 pythonic !
+        新建一个新的matrix类 传入一个新的 加和 二维数组"""
+        return Matrix([[a - b for a,b in zip(self.row_vector(i) , another.row_vector(i))]
+                       for i in range(self.row_num())])
+    def __mul__(self, k):
+        """返回矩阵的数量乘结果 : self * k
+        创建新的矩阵 - 传入二维列表"""
+        return Matrix([[e * k for  e in self.row_vector(i)]
+                       for i  in range(self.row_num())])
+    def __rmul__(self, k):
+        """返回矩阵的数量乘结果 - 右乘 : k * self
+        一个数字乘一个矩阵是拥有交换律的 : 可以使用 mul 结果一致"""
+        return self * k
+    def __truediv__(self, k):
+        """"返回数量除法的结果 : self / k
+        可以转成数量的乘法 , 也就是用 1/k 乘 当前矩阵"""
+        return (1 / k) * self
+    def __pos__(self):
+        """返回矩阵取正的结果"""
+        return 1 * self
+    def __neg__(self):
+        """返回矩阵取负的结果"""
+        return -1 * self  ## 矩阵的数量乘法来表示
+
+
+

+ 80 - 0
src/LinearAlgebra/playLA/Vector.py

@@ -0,0 +1,80 @@
+import  math
+from ._global import EPSILONE  ## .是从当前的包中 _global中 模块导入 epsilon变量
+
+class Vecotr :
+    def __init__(self  ,  lst):
+        # 调用构造的时候传入的是一个引用之  , 如调用处 这个值修改的话,就会影响_values值
+        self._values = list(lst)  # 利用list构造函数 , 相当于复制一份
+    @classmethod
+    def zero(cls , dim):
+        """返回一个dim维的零向量"""
+        return cls([0] * dim)
+    def norm(self):
+        """返回向量的模"""
+        return math.sqrt(sum(e ** 2 for e in self))
+    def normalize(self):
+        """返回向量的单位向量  -  向量中每一个分量除以此向量的模 最终组成返回的单位向量"""
+        # return Vecotr([e / self.norm() for e in self])  这样不优  , 因为重复计算了norm
+        # norm提到外面  , 并且向量创建一个副本  ,ps: 直接用 self 会修改原始向量,而创建副本则不会
+        # return 1/self.norm() * Vecotr(self._values)
+        # 判断零向量 , 如果是浮点数 , 和0进行判断 是不能直接使用 == 的  是因为计算机中 浮点计算很容易产生误差
+        #norm 是一个正数 因为它是开跟出来的
+        # if self.norm() == 0 :
+        if self.norm()  < EPSILONE : # 和一个极小的值作比较
+            raise ZeroDivisionError("Normalize error !  norm is zero ~ ")
+        return Vecotr(self._values) / self.norm()
+    def dot(self,another):
+        """向量点乘 , 返回结果标量
+        两个向量进行点乘 , 维度必须相等"""
+        assert len(self) == len(another),\
+            "Error in dot product . Length of vectors must same ."
+        return sum(a * b for a , b in zip(self , another))
+
+
+
+
+
+    def __repr__(self):
+        return "Vector({})".format(self._values)
+    def __str__(self):
+        return "({})".format(", ".join( str(e) for e in self._values))
+    def __len__(self):
+        """返回向量长度 (有多少个元素)"""
+        return len(self._values)
+    def __getitem__(self, index):
+        """取向量的第index个元素"""
+        return self._values[index]
+    def  __add__(self, another):
+        """"向量加法 , 返回结果向量"""
+        assert len(self) == len(another), \
+            "Error in adding  . Length of vectors must be same . "
+        # return Vecotr([a + b for a,b in zip(self._values , another._values)])  # 尽量不去访问私有成员变量 _下划线约定
+        return Vecotr([a + b for a, b in zip(self, another)])  #可迭代不需要访问 __values
+    def __sub__(self, another):
+        """向量剑法  , 返回结果向量"""
+        assert  len(self) == len(another), \
+            "Error in subtracting . Length of vectors must be same ."
+        return Vecotr([a- b for a,b in zip(self , another)])
+    def __iter__(self):
+        """"返回向量的迭代器"""
+        return self._values.__iter__()  # 列表本身就有迭代器
+    def __mul__(self, k):
+        """返回数量乘法的结果向量 : self * k"""
+        return Vecotr([k * e for e in self])
+    def __rmul__(self, k):
+        """返回数量乘法的结果向量 : k * self """
+        return self * k
+    def __pos__(self):
+        """"返回向量取正的结果向量"""
+        return 1* self
+    def __neg__(self):
+        """返回向量取负的结果向量"""
+        return -1 * self
+    def __truediv__(self, k):
+        """返回数量除法的结果向量  -  / // python3 中区分普通的除法、整数除法
+        由于实现的向量类中 , 每一个元素不一定都是整数  所以覆盖 __truediv__ 魔法方法
+        这个数量除法本质就是 数量乘法 , 为了编程方便 , 造了一个这个概念
+        其结果就相当于是  self / k"""
+        return (1/k) * self
+
+

+ 0 - 0
src/LinearAlgebra/playLA/__init__.py


+ 2 - 0
src/LinearAlgebra/playLA/_global.py

@@ -0,0 +1,2 @@
+## 包内全局访问的 , 但是不提供用户访问
+EPSILONE = 1e-8  ## 精度范围

+ 176 - 0
src/LinearAlgebra/readme_deep.txt

@@ -0,0 +1,176 @@
+GPT模型结构:
+├── 输入嵌入层 (Token Embedding)
+├── 位置编码层 (Positional Encoding)
+├── Transformer块 × N层:
+│   ├── 层归一化
+│   ├── 因果自注意力机制
+│   ├── 层归一化
+│   └── 前馈神经网络
+├── 最终层归一化
+└── 语言模型头 (输出层)
+
+
+
+
+关键特性:
+
+✅ 因果掩码: 确保模型只能看到左侧的token
+
+✅ 多头注意力: 并行处理不同表示子空间
+
+✅ 权重共享: 输入输出嵌入权重共享
+
+✅ 位置编码: 可学习的位置嵌入
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+        """"将下载好的模型放在~/.cache/torch/checkpoints文件夹中即可(windows为C:\用户名\.cache\torch\.checkpoints)
+
+Resnet:
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+inception:
+
+model_urls = {
+    # Inception v3 ported from TensorFlow
+    'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
+}
+
+Densenet:
+
+model_urls = {
+    'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
+    'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
+    'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
+    'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
+}
+
+Alexnet:
+
+model_urls = {
+    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
+}
+
+vggnet:
+
+model_urls = {
+    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
+    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
+    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
+    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
+    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
+    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
+    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
+    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
+}"""
+
+
+
+
+训练流程
+
+训练步骤:
+1. 数据准备 → 创建字符级分词器
+2. 样本生成 → 滑动窗口创建训练对
+3. 模型配置 → 设置超参数
+4. 训练循环 → 前向传播 + 反向传播
+5. 模型保存 → 保存权重和配置
+
+
+
+生成策略:
+- 温度控制 (temperature):
+  - <1.0: 更确定性的输出
+  - 1.0: 原始分布
+  - >1.0: 更多样化的输出
+
+- Top-K采样:
+  - 只从概率最高的K个token中采样
+  - 避免选择低概率的token
+
+
+
+
+  # 输入
+start_text = "从前有座山"
+
+# 生成过程
+1. 编码: "从前有座山" → [23, 45, 67, 89, 12]
+2. 自回归生成:
+   输入: [23, 45, 67, 89, 12] → 预测下一个token: 34
+   输入: [23, 45, 67, 89, 12, 34] → 预测下一个token: 56
+   ... 重复直到生成长度达到100
+3. 解码: [23, 45, 67, 89, 12, 34, 56, ...] → "从前有座山,山里有座庙..."
+
+# 输出示例:
+"从前有座山,山里有座庙,庙里有个老和尚在给小和尚讲故事..."
+
+
+
+
+
+配置选项:
+- vocab_size: 词汇表大小 (默认: 50257)
+- n_layer: Transformer层数 (默认: 12)
+- n_head: 注意力头数 (默认: 12)
+- n_embd: 嵌入维度 (默认: 768)
+- max_seq_len: 最大序列长度 (默认: 1024)
+- dropout: 丢弃率 (默认: 0.1)
+
+
+
+
+扩展功能:
+1. 从预训练模型加载
+2. 注意力权重可视化
+3. 自定义分词器
+4. 多种采样策略
+
+
+
+
+
+文本数据 → 分词器 → 训练样本 → GPT模型训练 → 模型保存
+     ↓
+文本生成 ← 解码 ← 采样 ← 前向传播 ← 输入提示
+
+
+
+🎯 主要应用场景
+文本生成: 故事创作、对话生成
+
+语言建模: 学习文本数据的概率分布
+
+教育演示: 理解Transformer架构
+
+研究实验: NLP模型的基础构建块
+
+📊 性能特点
+特性	说明
+参数量	可配置,从几万到数亿参数
+训练数据	支持自定义文本数据
+生成质量	依赖训练数据和模型大小
+计算需求	GPU加速训练,CPU可推理