5 months ago · d6d679380e
--- a/doc/doc.txt
+++ b/doc/doc.txt
--- a/src/LinearAlgebra/.idea/.gitignore
+++ b/src/LinearAlgebra/.idea/.gitignore
@@ -0,0 +1,3 @@
 
				+# 默认忽略的文件
			
 
				+/shelf/
			
 
				+/workspace.xml
			
--- a/src/LinearAlgebra/.idea/LinearAlgebra.iml
+++ b/src/LinearAlgebra/.idea/LinearAlgebra.iml
@@ -0,0 +1,11 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<module type="PYTHON_MODULE" version="4">
			
 
				+  <component name="NewModuleRootManager">
			
 
				+    <content url="file://$MODULE_DIR$">
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/my_gptmodel" />
			
 
				+    </content>
			
 
				+    <orderEntry type="jdk" jdkName="Python 3.12 (LinearAlgebra)" jdkType="Python SDK" />
			
 
				+    <orderEntry type="sourceFolder" forTests="false" />
			
 
				+  </component>
			
 
				+</module>
			
--- a/src/LinearAlgebra/.idea/inspectionProfiles/profiles_settings.xml
+++ b/src/LinearAlgebra/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
 
				+<component name="InspectionProjectProfileManager">
			
 
				+  <settings>
			
 
				+    <option name="USE_PROJECT_PROFILE" value="false" />
			
 
				+    <version value="1.0" />
			
 
				+  </settings>
			
 
				+</component>
			
--- a/src/LinearAlgebra/.idea/misc.xml
+++ b/src/LinearAlgebra/.idea/misc.xml
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="Black">
			
 
				+    <option name="sdkName" value="Python 3.12 (LinearAlgebra)" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/src/LinearAlgebra/.idea/modules.xml
+++ b/src/LinearAlgebra/.idea/modules.xml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="ProjectModuleManager">
			
 
				+    <modules>
			
 
				+      <module fileurl="file://$PROJECT_DIR$/.idea/LinearAlgebra.iml" filepath="$PROJECT_DIR$/.idea/LinearAlgebra.iml" />
			
 
				+    </modules>
			
 
				+  </component>
			
 
				+</project>
			
--- a/src/LinearAlgebra/.idea/vcs.xml
+++ b/src/LinearAlgebra/.idea/vcs.xml
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="VcsDirectoryMappings">
			
 
				+    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/src/LinearAlgebra/deepleaning_demo.py
+++ b/src/LinearAlgebra/deepleaning_demo.py
@@ -0,0 +1,1283 @@
 
				+import torch
			
 
				+import torch.nn as nn
			
 
				+import torch.nn.functional as F
			
 
				+import math
			
 
				+import json
			
 
				+import os
			
 
				+import time
			
 
				+from typing import List, Optional, Tuple, Dict
			
 
				+from datetime import datetime
			
 
				+import glob
			
 
				+
			
 
				+# ==================== 全局配置参数 ====================
			
 
				+
			
 
				+# 模型配置 - 增大模型提高质量
			
 
				+MODEL_CONFIG = {
			
 
				+    'n_layer': 8,  # 增加层数
			
 
				+    'n_head': 8,  # 增加注意力头
			
 
				+    'n_embd': 256,  # 增加嵌入维度
			
 
				+    'max_seq_len': 512,
			
 
				+    'dropout': 0.1,
			
 
				+    'bias': True,
			
 
				+}
			
 
				+
			
 
				+# 训练配置 - 优化训练参数
			
 
				+TRAINING_CONFIG = {
			
 
				+    'epochs': 2000,
			
 
				+    'batch_size': 16,
			
 
				+    'learning_rate': 6e-4,
			
 
				+    'block_size': 256,
			
 
				+    'weight_decay': 0.01,
			
 
				+    'grad_clip': 1.0,
			
 
				+    'warmup_epochs': 50,
			
 
				+    'min_loss': 0.05,  # 目标最小损失
			
 
				+}
			
 
				+
			
 
				+# 生成配置 - 设为全局，无需输入
			
 
				+GENERATION_CONFIG = {
			
 
				+    'max_tokens': 900,
			
 
				+    'temperature': 0.7,
			
 
				+    'top_k': 40,
			
 
				+    'top_p': 0.85,
			
 
				+    'repetition_penalty': 1.1,
			
 
				+}
			
 
				+
			
 
				+# 文件配置
			
 
				+FILE_CONFIG = {
			
 
				+    'save_dir': "my_gptmodel",
			
 
				+    'training_data_file': "training_data.txt",
			
 
				+    'programming_data_file': "programming_data.txt",
			
 
				+    'model_prefix': "gpt_model",
			
 
				+    'tokenizer_prefix': "tokenizer",
			
 
				+}
			
 
				+
			
 
				+# 训练数据配置
			
 
				+TRAINING_DATA_CONFIG = {
			
 
				+    'data_repetition': 5,  # 增加数据重复
			
 
				+    'min_text_length': 500,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# ==================== 模型类定义 ====================
			
 
				+
			
 
				+class GPTConfig:
			
 
				+    """GPT模型配置类"""
			
 
				+
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            vocab_size: int = 50257,
			
 
				+            n_layer: int = MODEL_CONFIG['n_layer'],
			
 
				+            n_head: int = MODEL_CONFIG['n_head'],
			
 
				+            n_embd: int = MODEL_CONFIG['n_embd'],
			
 
				+            max_seq_len: int = MODEL_CONFIG['max_seq_len'],
			
 
				+            dropout: float = MODEL_CONFIG['dropout'],
			
 
				+            bias: bool = MODEL_CONFIG['bias'],
			
 
				+    ):
			
 
				+        self.vocab_size = vocab_size
			
 
				+        self.n_layer = n_layer
			
 
				+        self.n_head = n_head
			
 
				+        self.n_embd = n_embd
			
 
				+        self.max_seq_len = max_seq_len
			
 
				+        self.dropout = dropout
			
 
				+        self.bias = bias
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return f"GPTConfig(vocab_size={self.vocab_size}, n_layer={self.n_layer}, n_head={self.n_head}, n_embd={self.n_embd})"
			
 
				+
			
 
				+
			
 
				+class OptimizedCausalSelfAttention(nn.Module):
			
 
				+    """优化的因果自注意力机制"""
			
 
				+
			
 
				+    def __init__(self, config: GPTConfig):
			
 
				+        super().__init__()
			
 
				+        assert config.n_embd % config.n_head == 0
			
 
				+
			
 
				+        self.n_head = config.n_head
			
 
				+        self.n_embd = config.n_embd
			
 
				+        self.head_size = config.n_embd // config.n_head
			
 
				+
			
 
				+        self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
			
 
				+        self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
			
 
				+        self.attn_dropout = nn.Dropout(config.dropout)
			
 
				+        self.resid_dropout = nn.Dropout(config.dropout)
			
 
				+
			
 
				+        # 预计算因果掩码
			
 
				+        self.register_buffer("bias", torch.tril(torch.ones(config.max_seq_len, config.max_seq_len))
			
 
				+                             .view(1, 1, config.max_seq_len, config.max_seq_len))
			
 
				+
			
 
				+        self.scale = 1.0 / math.sqrt(self.head_size)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        B, T, C = x.size()
			
 
				+
			
 
				+        qkv = self.c_attn(x)
			
 
				+        q, k, v = qkv.split(self.n_embd, dim=2)
			
 
				+
			
 
				+        q = q.view(B, T, self.n_head, self.head_size).transpose(1, 2)
			
 
				+        k = k.view(B, T, self.n_head, self.head_size).transpose(1, 2)
			
 
				+        v = v.view(B, T, self.n_head, self.head_size).transpose(1, 2)
			
 
				+
			
 
				+        att = (q @ k.transpose(-2, -1)) * self.scale
			
 
				+        att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float('-inf'))
			
 
				+        att = F.softmax(att, dim=-1)
			
 
				+        att = self.attn_dropout(att)
			
 
				+
			
 
				+        y = att @ v
			
 
				+        y = y.transpose(1, 2).contiguous().view(B, T, C)
			
 
				+        y = self.resid_dropout(self.c_proj(y))
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class OptimizedMLP(nn.Module):
			
 
				+    """优化的多层感知机"""
			
 
				+
			
 
				+    def __init__(self, config: GPTConfig):
			
 
				+        super().__init__()
			
 
				+        intermediate_size = 4 * config.n_embd  # 恢复4倍维度
			
 
				+        self.c_fc = nn.Linear(config.n_embd, intermediate_size, bias=config.bias)
			
 
				+        self.gelu = nn.GELU()
			
 
				+        self.c_proj = nn.Linear(intermediate_size, config.n_embd, bias=config.bias)
			
 
				+        self.dropout = nn.Dropout(config.dropout)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.c_fc(x)
			
 
				+        x = self.gelu(x)
			
 
				+        x = self.c_proj(x)
			
 
				+        x = self.dropout(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class OptimizedBlock(nn.Module):
			
 
				+    """优化的Transformer块"""
			
 
				+
			
 
				+    def __init__(self, config: GPTConfig):
			
 
				+        super().__init__()
			
 
				+        self.ln_1 = nn.LayerNorm(config.n_embd, eps=1e-5)
			
 
				+        self.attn = OptimizedCausalSelfAttention(config)
			
 
				+        self.ln_2 = nn.LayerNorm(config.n_embd, eps=1e-5)
			
 
				+        self.mlp = OptimizedMLP(config)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = x + self.attn(self.ln_1(x))
			
 
				+        x = x + self.mlp(self.ln_2(x))
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class OptimizedGPT(nn.Module):
			
 
				+    """优化的GPT模型"""
			
 
				+
			
 
				+    def __init__(self, config: GPTConfig):
			
 
				+        super().__init__()
			
 
				+        self.config = config
			
 
				+
			
 
				+        self.wte = nn.Embedding(config.vocab_size, config.n_embd)
			
 
				+        self.wpe = nn.Embedding(config.max_seq_len, config.n_embd)
			
 
				+        self.drop = nn.Dropout(config.dropout)
			
 
				+
			
 
				+        self.blocks = nn.ModuleList([OptimizedBlock(config) for _ in range(config.n_layer)])
			
 
				+        self.ln_f = nn.LayerNorm(config.n_embd, eps=1e-5)
			
 
				+        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
			
 
				+
			
 
				+        # 权重绑定
			
 
				+        self.wte.weight = self.lm_head.weight
			
 
				+
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+    def _init_weights(self, module):
			
 
				+        """权重初始化"""
			
 
				+        if isinstance(module, nn.Linear):
			
 
				+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
			
 
				+            if module.bias is not None:
			
 
				+                torch.nn.init.zeros_(module.bias)
			
 
				+        elif isinstance(module, nn.Embedding):
			
 
				+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
			
 
				+
			
 
				+    def forward(self, idx, targets=None):
			
 
				+        device = idx.device
			
 
				+        b, t = idx.size()
			
 
				+
			
 
				+        assert t <= self.config.max_seq_len, f"序列长度{t}超过最大长度{self.config.max_seq_len}"
			
 
				+
			
 
				+        pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
			
 
				+
			
 
				+        tok_emb = self.wte(idx)
			
 
				+        pos_emb = self.wpe(pos)
			
 
				+        x = self.drop(tok_emb + pos_emb)
			
 
				+
			
 
				+        for block in self.blocks:
			
 
				+            x = block(x)
			
 
				+
			
 
				+        x = self.ln_f(x)
			
 
				+
			
 
				+        if targets is not None:
			
 
				+            logits = self.lm_head(x)
			
 
				+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
			
 
				+        else:
			
 
				+            logits = self.lm_head(x)
			
 
				+            loss = None
			
 
				+
			
 
				+        return logits, loss
			
 
				+
			
 
				+    def generate(self, idx, max_new_tokens=None, temperature=None, top_k=None, top_p=None, repetition_penalty=None):
			
 
				+        """生成文本 - 增强版"""
			
 
				+        max_new_tokens = max_new_tokens or GENERATION_CONFIG['max_tokens']
			
 
				+        temperature = temperature or GENERATION_CONFIG['temperature']
			
 
				+        top_k = top_k if top_k is not None else GENERATION_CONFIG['top_k']
			
 
				+        top_p = top_p if top_p is not None else GENERATION_CONFIG['top_p']
			
 
				+        repetition_penalty = repetition_penalty or GENERATION_CONFIG['repetition_penalty']
			
 
				+
			
 
				+        generated_sequence = []
			
 
				+
			
 
				+        for _ in range(max_new_tokens):
			
 
				+            idx_cond = idx if idx.size(1) <= self.config.max_seq_len else idx[:, -self.config.max_seq_len:]
			
 
				+
			
 
				+            logits, _ = self(idx_cond)
			
 
				+            logits = logits[:, -1, :]
			
 
				+
			
 
				+            # 重复惩罚
			
 
				+            if repetition_penalty != 1.0:
			
 
				+                for token in set(generated_sequence):
			
 
				+                    logits[0, token] /= repetition_penalty
			
 
				+
			
 
				+            # 温度调节
			
 
				+            if temperature != 1.0:
			
 
				+                logits = logits / temperature
			
 
				+
			
 
				+            # Top-K 过滤
			
 
				+            if top_k is not None and top_k > 0:
			
 
				+                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
			
 
				+                logits[logits < v[:, -1].unsqueeze(-1)] = -float('Inf')
			
 
				+
			
 
				+            # Top-P (核采样) 过滤
			
 
				+            if top_p is not None and top_p < 1.0:
			
 
				+                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
			
 
				+                cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
			
 
				+
			
 
				+                # 移除累积概率超过top_p的token
			
 
				+                sorted_indices_to_remove = cumulative_probs > top_p
			
 
				+                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
			
 
				+                sorted_indices_to_remove[..., 0] = 0
			
 
				+
			
 
				+                indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
			
 
				+                logits[indices_to_remove] = -float('Inf')
			
 
				+
			
 
				+            probs = F.softmax(logits, dim=-1)
			
 
				+
			
 
				+            # 检查是否有有效的概率
			
 
				+            if torch.all(probs == 0):
			
 
				+                break
			
 
				+
			
 
				+            idx_next = torch.multinomial(probs, num_samples=1)
			
 
				+            generated_sequence.append(idx_next.item())
			
 
				+            idx = torch.cat((idx, idx_next), dim=1)
			
 
				+
			
 
				+        return idx
			
 
				+
			
 
				+
			
 
				+class CharTokenizer:
			
 
				+    """增强版分词器"""
			
 
				+
			
 
				+    def __init__(self, text: str = None, stoi: Dict = None):
			
 
				+        if stoi is not None:
			
 
				+            self.stoi = self._normalize_stoi(stoi)
			
 
				+        elif text is not None:
			
 
				+            chars = sorted(list(set(text)))
			
 
				+            self.stoi = {ch: i for i, ch in enumerate(chars)}
			
 
				+        else:
			
 
				+            raise ValueError("必须提供text或stoi参数")
			
 
				+
			
 
				+        self.itos = {v: k for k, v in self.stoi.items()}
			
 
				+        self.vocab_size = len(self.stoi)
			
 
				+        self.unknown_token = '?'
			
 
				+
			
 
				+    def _normalize_stoi(self, stoi_dict: Dict) -> Dict:
			
 
				+        """标准化stoi字典"""
			
 
				+        normalized = {}
			
 
				+        for k, v in stoi_dict.items():
			
 
				+            if isinstance(k, str) and k.isdigit():
			
 
				+                char_key = chr(int(k))
			
 
				+                normalized[char_key] = int(v)
			
 
				+            elif isinstance(k, int):
			
 
				+                normalized[chr(k)] = int(v)
			
 
				+            else:
			
 
				+                normalized[k] = int(v)
			
 
				+        return normalized
			
 
				+
			
 
				+    def encode(self, text: str) -> List[int]:
			
 
				+        return [self.stoi.get(ch, 0) for ch in text]
			
 
				+
			
 
				+    def decode(self, indices: List[int]) -> str:
			
 
				+        return ''.join([self.itos.get(i, self.unknown_token) for i in indices])
			
 
				+
			
 
				+    def save(self, filepath: str):
			
 
				+        """保存分词器"""
			
 
				+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
			
 
				+        with open(filepath, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(self.stoi, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def load(cls, filepath: str):
			
 
				+        """加载分词器"""
			
 
				+        with open(filepath, 'r', encoding='utf-8') as f:
			
 
				+            stoi = json.load(f)
			
 
				+        tokenizer = cls(stoi=stoi)
			
 
				+        return tokenizer
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return f"CharTokenizer(vocab_size={self.vocab_size})"
			
 
				+
			
 
				+
			
 
				+# ==================== 自动数据收集 ====================
			
 
				+
			
 
				+def collect_training_data():
			
 
				+    """自动收集训练数据"""
			
 
				+    data_sources = []
			
 
				+
			
 
				+    # 1. 使用内置的编程数据
			
 
				+    programming_data = """
			
 
				+# Python完整知识库
			
 
				+def calculate_factorial(n):
			
 
				+    if n == 0 or n == 1:
			
 
				+        return 1
			
 
				+    else:
			
 
				+        return n * calculate_factorial(n-1)
			
 
				+
			
 
				+class Student:
			
 
				+    def __init__(self, name, age, grade):
			
 
				+        self.name = name
			
 
				+        self.age = age
			
 
				+        self.grade = grade
			
 
				+        self.subjects = []
			
 
				+
			
 
				+    def add_subject(self, subject):
			
 
				+        self.subjects.append(subject)
			
 
				+
			
 
				+    def get_average(self, scores):
			
 
				+        if not scores:
			
 
				+            return 0
			
 
				+        return sum(scores) / len(scores)
			
 
				+
			
 
				+def read_file_safely(filename):
			
 
				+    try:
			
 
				+        with open(filename, 'r', encoding='utf-8') as file:
			
 
				+            return file.read()
			
 
				+    except FileNotFoundError:
			
 
				+        return "文件不存在"
			
 
				+
			
 
				+# 数据结构和算法
			
 
				+def binary_search(arr, target):
			
 
				+    left, right = 0, len(arr) - 1
			
 
				+    while left <= right:
			
 
				+        mid = (left + right) // 2
			
 
				+        if arr[mid] == target:
			
 
				+            return mid
			
 
				+        elif arr[mid] < target:
			
 
				+            left = mid + 1
			
 
				+        else:
			
 
				+            right = mid - 1
			
 
				+    return -1
			
 
				+
			
 
				+# 面向对象编程示例
			
 
				+class Animal:
			
 
				+    def __init__(self, name, species):
			
 
				+        self.name = name
			
 
				+        self.species = species
			
 
				+
			
 
				+    def speak(self):
			
 
				+        return "动物发出声音"
			
 
				+
			
 
				+class Dog(Animal):
			
 
				+    def __init__(self, name, breed):
			
 
				+        super().__init__(name, "犬科")
			
 
				+        self.breed = breed
			
 
				+
			
 
				+    def speak(self):
			
 
				+        return "汪汪!"
			
 
				+
			
 
				+# 文件操作类
			
 
				+class FileProcessor:
			
 
				+    def __init__(self, filename):
			
 
				+        self.filename = filename
			
 
				+
			
 
				+    def read_content(self):
			
 
				+        try:
			
 
				+            with open(self.filename, 'r', encoding='utf-8') as f:
			
 
				+                return f.read()
			
 
				+        except Exception as e:
			
 
				+            return f"错误: {e}"
			
 
				+
			
 
				+    def write_content(self, content):
			
 
				+        try:
			
 
				+            with open(self.filename, 'w', encoding='utf-8') as f:
			
 
				+                f.write(content)
			
 
				+            return True
			
 
				+        except Exception as e:
			
 
				+            print(f"写入错误: {e}")
			
 
				+            return False
			
 
				+
			
 
				+# 数学计算函数
			
 
				+import math
			
 
				+def quadratic_equation(a, b, c):
			
 
				+    discriminant = b**2 - 4*a*c
			
 
				+    if discriminant < 0:
			
 
				+        return "无实数解"
			
 
				+    elif discriminant == 0:
			
 
				+        x = -b / (2*a)
			
 
				+        return f"唯一解: x = {x}"
			
 
				+    else:
			
 
				+        x1 = (-b + math.sqrt(discriminant)) / (2*a)
			
 
				+        x2 = (-b - math.sqrt(discriminant)) / (2*a)
			
 
				+        return f"两个解: x1 = {x1}, x2 = {x2}"
			
 
				+
			
 
				+# 字符串处理工具
			
 
				+def process_text(text):
			
 
				+    lines = text.split('\\n')
			
 
				+    processed_lines = []
			
 
				+    for line in lines:
			
 
				+        line = line.strip()
			
 
				+        if line and not line.startswith('#'):
			
 
				+            processed_lines.append(line)
			
 
				+    return '\\n'.join(processed_lines)
			
 
				+
			
 
				+# 列表操作示例
			
 
				+def list_operations():
			
 
				+    numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
			
 
				+    squares = [x**2 for x in numbers]
			
 
				+    even_squares = [x**2 for x in numbers if x % 2 == 0]
			
 
				+    return squares, even_squares
			
 
				+
			
 
				+# 字典操作示例
			
 
				+def dict_operations():
			
 
				+    student = {
			
 
				+        "name": "张三",
			
 
				+        "age": 20,
			
 
				+        "major": "计算机科学",
			
 
				+        "grades": {"数学": 90, "英语": 85, "编程": 95}
			
 
				+    }
			
 
				+    return student
			
 
				+
			
 
				+# 异常处理示例
			
 
				+def safe_division(a, b):
			
 
				+    try:
			
 
				+        result = a / b
			
 
				+        return result
			
 
				+    except ZeroDivisionError:
			
 
				+        return "除数不能为零"
			
 
				+    except TypeError:
			
 
				+        return "输入必须是数字"
			
 
				+
			
 
				+# 装饰器示例
			
 
				+def timer(func):
			
 
				+    def wrapper(*args, **kwargs):
			
 
				+        import time
			
 
				+        start = time.time()
			
 
				+        result = func(*args, **kwargs)
			
 
				+        end = time.time()
			
 
				+        print(f"函数 {func.__name__} 执行时间: {end-start:.2f}秒")
			
 
				+        return result
			
 
				+    return wrapper
			
 
				+
			
 
				+@timer
			
 
				+def expensive_operation(n):
			
 
				+    import time
			
 
				+    time.sleep(0.1)
			
 
				+    return sum(range(n))
			
 
				+
			
 
				+# 生成器示例
			
 
				+def fibonacci_generator(n):
			
 
				+    a, b = 0, 1
			
 
				+    for _ in range(n):
			
 
				+        yield a
			
 
				+        a, b = b, a + b
			
 
				+
			
 
				+# 上下文管理器
			
 
				+class DatabaseConnection:
			
 
				+    def __init__(self, db_name):
			
 
				+        self.db_name = db_name
			
 
				+
			
 
				+    def __enter__(self):
			
 
				+        print(f"连接数据库: {self.db_name}")
			
 
				+        return self
			
 
				+
			
 
				+    def __exit__(self, exc_type, exc_val, exc_tb):
			
 
				+        print("关闭数据库连接")
			
 
				+
			
 
				+    def query(self, sql):
			
 
				+        print(f"执行查询: {sql}")
			
 
				+        return [{"id": 1, "name": "示例数据"}]
			
 
				+
			
 
				+# 排序算法
			
 
				+def bubble_sort(arr):
			
 
				+    n = len(arr)
			
 
				+    for i in range(n):
			
 
				+        for j in range(0, n-i-1):
			
 
				+            if arr[j] > arr[j+1]:
			
 
				+                arr[j], arr[j+1] = arr[j+1], arr[j]
			
 
				+    return arr
			
 
				+
			
 
				+def quick_sort(arr):
			
 
				+    if len(arr) <= 1:
			
 
				+        return arr
			
 
				+    pivot = arr[len(arr)//2]
			
 
				+    left = [x for x in arr if x < pivot]
			
 
				+    middle = [x for x in arr if x == pivot]
			
 
				+    right = [x for x in arr if x > pivot]
			
 
				+    return quick_sort(left) + middle + quick_sort(right)
			
 
				+
			
 
				+# 数据结构
			
 
				+class LinkedList:
			
 
				+    class Node:
			
 
				+        def __init__(self, data):
			
 
				+            self.data = data
			
 
				+            self.next = None
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.head = None
			
 
				+
			
 
				+    def append(self, data):
			
 
				+        new_node = self.Node(data)
			
 
				+        if not self.head:
			
 
				+            self.head = new_node
			
 
				+            return
			
 
				+        current = self.head
			
 
				+        while current.next:
			
 
				+            current = current.next
			
 
				+        current.next = new_node
			
 
				+
			
 
				+    def display(self):
			
 
				+        elements = []
			
 
				+        current = self.head
			
 
				+        while current:
			
 
				+            elements.append(current.data)
			
 
				+            current = current.next
			
 
				+        return elements
			
 
				+
			
 
				+class Stack:
			
 
				+    def __init__(self):
			
 
				+        self.items = []
			
 
				+
			
 
				+    def push(self, item):
			
 
				+        self.items.append(item)
			
 
				+
			
 
				+    def pop(self):
			
 
				+        if not self.is_empty():
			
 
				+            return self.items.pop()
			
 
				+        return None
			
 
				+
			
 
				+    def is_empty(self):
			
 
				+        return len(self.items) == 0
			
 
				+
			
 
				+    def peek(self):
			
 
				+        if not self.is_empty():
			
 
				+            return self.items[-1]
			
 
				+        return None
			
 
				+
			
 
				+# 主程序入口
			
 
				+if __name__ == "__main__":
			
 
				+    # 测试各种功能
			
 
				+    print("测试开始...")
			
 
				+
			
 
				+    # 数学函数测试
			
 
				+    result = calculate_factorial(5)
			
 
				+    print(f"5的阶乘: {result}")
			
 
				+
			
 
				+    # 学生类测试
			
 
				+    student = Student("李四", 20, "计算机科学")
			
 
				+    student.add_subject("Python编程")
			
 
				+    student.add_subject("数据结构")
			
 
				+    print(f"学生: {student.name}, 科目: {student.subjects}")
			
 
				+
			
 
				+    # 排序测试
			
 
				+    test_arr = [64, 34, 25, 12, 22, 11, 90]
			
 
				+    sorted_arr = quick_sort(test_arr.copy())
			
 
				+    print(f"排序前: {test_arr}")
			
 
				+    print(f"排序后: {sorted_arr}")
			
 
				+
			
 
				+    print("所有测试完成!")
			
 
				+"""
			
 
				+    data_sources.append(programming_data)
			
 
				+
			
 
				+    # 2. 尝试读取外部数据文件
			
 
				+    data_files = [
			
 
				+        "training_data.txt",
			
 
				+        "programming_data.txt",
			
 
				+        "code_data.txt",
			
 
				+        "python_code.txt"
			
 
				+    ]
			
 
				+
			
 
				+    for data_file in data_files:
			
 
				+        file_path = os.path.join(FILE_CONFIG['save_dir'], data_file)
			
 
				+        if os.path.exists(file_path):
			
 
				+            try:
			
 
				+                with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+                    content = f.read().strip()
			
 
				+                    if len(content) > TRAINING_DATA_CONFIG['min_text_length']:
			
 
				+                        data_sources.append(content)
			
 
				+                        print(f"✅ 加载数据文件: {data_file} ({len(content)} 字符)")
			
 
				+            except Exception as e:
			
 
				+                print(f"⚠ 读取数据文件 {data_file} 时出错: {e}")
			
 
				+
			
 
				+    # 3. 如果没有足够数据，使用扩展的默认数据
			
 
				+    if len(''.join(data_sources)) < 10000:  # 如果总数据小于10k字符
			
 
				+        extended_data = """
			
 
				+# 更多Python编程示例
			
 
				+
			
 
				+# 网络请求示例
			
 
				+import requests
			
 
				+def fetch_url(url):
			
 
				+    try:
			
 
				+        response = requests.get(url, timeout=10)
			
 
				+        response.raise_for_status()
			
 
				+        return response.text
			
 
				+    except requests.RequestException as e:
			
 
				+        return f"请求失败: {e}"
			
 
				+
			
 
				+# 数据处理示例
			
 
				+import json
			
 
				+def process_json_data(json_string):
			
 
				+    try:
			
 
				+        data = json.loads(json_string)
			
 
				+        return data
			
 
				+    except json.JSONDecodeError as e:
			
 
				+        return f"JSON解析错误: {e}"
			
 
				+
			
 
				+def save_to_json(data, filename):
			
 
				+    try:
			
 
				+        with open(filename, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(data, f, ensure_ascii=False, indent=2)
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        print(f"保存JSON失败: {e}")
			
 
				+        return False
			
 
				+
			
 
				+# 日期时间处理
			
 
				+from datetime import datetime, timedelta
			
 
				+def date_operations():
			
 
				+    now = datetime.now()
			
 
				+    tomorrow = now + timedelta(days=1)
			
 
				+    last_week = now - timedelta(weeks=1)
			
 
				+
			
 
				+    return {
			
 
				+        "now": now.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+        "tomorrow": tomorrow.strftime("%Y-%m-%d"),
			
 
				+        "last_week": last_week.strftime("%Y-%m-%d")
			
 
				+    }
			
 
				+
			
 
				+# 正则表达式示例
			
 
				+import re
			
 
				+def extract_emails(text):
			
 
				+    pattern = r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b'
			
 
				+    return re.findall(pattern, text)
			
 
				+
			
 
				+def validate_phone(phone):
			
 
				+    pattern = r'^1[3-9]\\d{9}$'
			
 
				+    return bool(re.match(pattern, phone))
			
 
				+
			
 
				+# 多线程示例
			
 
				+import threading
			
 
				+import time
			
 
				+
			
 
				+class Counter:
			
 
				+    def __init__(self):
			
 
				+        self.value = 0
			
 
				+        self.lock = threading.Lock()
			
 
				+
			
 
				+    def increment(self):
			
 
				+        with self.lock:
			
 
				+            self.value += 1
			
 
				+
			
 
				+def worker(counter, iterations):
			
 
				+    for _ in range(iterations):
			
 
				+        counter.increment()
			
 
				+
			
 
				+# 单元测试示例
			
 
				+import unittest
			
 
				+class TestMathFunctions(unittest.TestCase):
			
 
				+    def test_factorial(self):
			
 
				+        self.assertEqual(calculate_factorial(5), 120)
			
 
				+        self.assertEqual(calculate_factorial(0), 1)
			
 
				+
			
 
				+    def test_binary_search(self):
			
 
				+        arr = [1, 3, 5, 7, 9]
			
 
				+        self.assertEqual(binary_search(arr, 5), 2)
			
 
				+        self.assertEqual(binary_search(arr, 2), -1)
			
 
				+
			
 
				+# 主程序入口
			
 
				+if __name__ == "__main__":
			
 
				+    # 测试各种功能
			
 
				+    print("测试开始...")
			
 
				+
			
 
				+    # 数学函数测试
			
 
				+    result = calculate_factorial(5)
			
 
				+    print(f"5的阶乘: {result}")
			
 
				+
			
 
				+    # 学生类测试
			
 
				+    student = Student("李四", 20, "计算机科学")
			
 
				+    student.add_subject("Python编程")
			
 
				+    student.add_subject("数据结构")
			
 
				+    print(f"学生: {student.name}, 科目: {student.subjects}")
			
 
				+
			
 
				+    # 排序测试
			
 
				+    test_arr = [64, 34, 25, 12, 22, 11, 90]
			
 
				+    sorted_arr = quick_sort(test_arr.copy())
			
 
				+    print(f"排序前: {test_arr}")
			
 
				+    print(f"排序后: {sorted_arr}")
			
 
				+
			
 
				+    print("所有测试完成!")
			
 
				+"""
			
 
				+        data_sources.append(extended_data)
			
 
				+
			
 
				+    # 合并所有数据源
			
 
				+    combined_data = '\n'.join(data_sources)
			
 
				+
			
 
				+    # 数据重复以增加训练样本
			
 
				+    combined_data = combined_data * TRAINING_DATA_CONFIG['data_repetition']
			
 
				+
			
 
				+    print(f"📊 总训练数据: {len(combined_data):,} 字符")
			
 
				+    return combined_data
			
 
				+
			
 
				+
			
 
				+# ==================== 优化的训练函数 ====================
			
 
				+
			
 
				+class EnhancedTrainingMonitor:
			
 
				+    """增强的训练监控器"""
			
 
				+
			
 
				+    def __init__(self, save_dir: str = FILE_CONFIG['save_dir']):
			
 
				+        self.losses = []
			
 
				+        self.start_time = time.time()
			
 
				+        self.save_dir = save_dir
			
 
				+        os.makedirs(save_dir, exist_ok=True)
			
 
				+        self.best_loss = float('inf')
			
 
				+        self.patience = 100
			
 
				+        self.checkpoint_frequency = 50  # 每50轮保存一次
			
 
				+
			
 
				+    def update(self, loss, epoch, model=None, tokenizer=None):
			
 
				+        self.losses.append(loss)
			
 
				+        elapsed = time.time() - self.start_time
			
 
				+
			
 
				+        # 进度显示
			
 
				+        if epoch % 20 == 0 or epoch < 10 or epoch == TRAINING_CONFIG['epochs'] - 1:
			
 
				+            epochs_per_sec = (epoch + 1) / elapsed
			
 
				+            eta = (TRAINING_CONFIG['epochs'] - epoch - 1) / epochs_per_sec if epochs_per_sec > 0 else 0
			
 
				+
			
 
				+            print(f"Epoch {epoch:4d} | Loss: {loss:.4f} | "
			
 
				+                  f"Speed: {epochs_per_sec:.2f} epoch/s | ETA: {eta:.0f}s")
			
 
				+
			
 
				+            # 定期保存检查点
			
 
				+            if model and epoch % self.checkpoint_frequency == 0 and epoch > 0:
			
 
				+                self.save_checkpoint(model, tokenizer, epoch, loss)
			
 
				+
			
 
				+            # 保存最佳模型
			
 
				+            if loss < self.best_loss:
			
 
				+                self.best_loss = loss
			
 
				+                if model and epoch > 100:  # 100轮后才开始保存最佳模型
			
 
				+                    self.save_best_model(model, tokenizer, epoch, loss)
			
 
				+
			
 
				+    def save_checkpoint(self, model, tokenizer, epoch, loss):
			
 
				+        """保存检查点"""
			
 
				+        checkpoint = {
			
 
				+            'epoch': epoch,
			
 
				+            'model_state_dict': model.state_dict(),
			
 
				+            'loss': loss,
			
 
				+            'config': model.config,
			
 
				+            'tokenizer': tokenizer.stoi,
			
 
				+            'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+        }
			
 
				+        path = os.path.join(self.save_dir, f"checkpoint_epoch_{epoch}.pth")
			
 
				+        torch.save(checkpoint, path)
			
 
				+        print(f"💾 检查点已保存: {path}")
			
 
				+
			
 
				+    def save_best_model(self, model, tokenizer, epoch, loss):
			
 
				+        """保存最佳模型"""
			
 
				+        model_data = {
			
 
				+            'model_state_dict': model.state_dict(),
			
 
				+            'config': model.config,
			
 
				+            'tokenizer': tokenizer.stoi,
			
 
				+            'training_losses': self.losses,
			
 
				+            'epoch': epoch,
			
 
				+            'loss': loss,
			
 
				+            'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S"),
			
 
				+            'global_configs': {
			
 
				+                'MODEL_CONFIG': MODEL_CONFIG,
			
 
				+                'TRAINING_CONFIG': TRAINING_CONFIG,
			
 
				+                'GENERATION_CONFIG': GENERATION_CONFIG
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        model_path = os.path.join(self.save_dir, f"{FILE_CONFIG['model_prefix']}_best.pth")
			
 
				+        torch.save(model_data, model_path)
			
 
				+        print(f"🏆 最佳模型已保存: {model_path} (loss: {loss:.4f})")
			
 
				+
			
 
				+    def plot_loss(self):
			
 
				+        """绘制损失曲线"""
			
 
				+        try:
			
 
				+            import matplotlib.pyplot as plt
			
 
				+            plt.figure(figsize=(12, 6))
			
 
				+            plt.plot(self.losses)
			
 
				+            plt.title('Training Loss Progress')
			
 
				+            plt.xlabel('Epoch')
			
 
				+            plt.ylabel('Loss')
			
 
				+            plt.grid(True, alpha=0.3)
			
 
				+            loss_path = os.path.join(self.save_dir, 'training_loss.png')
			
 
				+            plt.savefig(loss_path, dpi=150, bbox_inches='tight')
			
 
				+            print(f"✓ 损失曲线已保存: {loss_path}")
			
 
				+        except ImportError:
			
 
				+            print("⚠ 未安装matplotlib，无法绘制损失曲线")
			
 
				+
			
 
				+
			
 
				+def get_improved_learning_rate(epoch, warmup_epochs=20):
			
 
				+    """改进的学习率调度"""
			
 
				+    if epoch < warmup_epochs:
			
 
				+        # 线性预热
			
 
				+        return TRAINING_CONFIG['learning_rate'] * (epoch + 1) / warmup_epochs
			
 
				+    else:
			
 
				+        # 余弦退火
			
 
				+        progress = (epoch - warmup_epochs) / (TRAINING_CONFIG['epochs'] - warmup_epochs)
			
 
				+        return TRAINING_CONFIG['learning_rate'] * 0.5 * (1 + math.cos(math.pi * progress))
			
 
				+
			
 
				+
			
 
				+def improved_train_gpt(model: OptimizedGPT, X: torch.Tensor, Y: torch.Tensor, tokenizer: CharTokenizer):
			
 
				+    """改进的训练函数"""
			
 
				+    model.train()
			
 
				+
			
 
				+    optimizer = torch.optim.AdamW(
			
 
				+        model.parameters(),
			
 
				+        lr=TRAINING_CONFIG['learning_rate'],
			
 
				+        weight_decay=TRAINING_CONFIG['weight_decay'],
			
 
				+        betas=(0.9, 0.95)
			
 
				+    )
			
 
				+
			
 
				+    monitor = EnhancedTrainingMonitor(FILE_CONFIG['save_dir'])
			
 
				+
			
 
				+    print(f"🚀 开始训练GPT模型")
			
 
				+    print(f"📊 总轮数: {TRAINING_CONFIG['epochs']}")
			
 
				+    print(f"🔢 模型参数: {sum(p.numel() for p in model.parameters()):,}")
			
 
				+    print(f"📚 训练样本: {len(X):,}")
			
 
				+
			
 
				+    # 设备设置
			
 
				+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
			
 
				+    print(f"💻 使用设备: {device}")
			
 
				+    model.to(device)
			
 
				+    X, Y = X.to(device), Y.to(device)
			
 
				+
			
 
				+    best_loss = float('inf')
			
 
				+    patience_counter = 0
			
 
				+
			
 
				+    for epoch in range(TRAINING_CONFIG['epochs']):
			
 
				+        # 动态学习率
			
 
				+        lr = get_improved_learning_rate(epoch, TRAINING_CONFIG['warmup_epochs'])
			
 
				+        for param_group in optimizer.param_groups:
			
 
				+            param_group['lr'] = lr
			
 
				+
			
 
				+        # 训练步骤
			
 
				+        optimizer.zero_grad()
			
 
				+
			
 
				+        indices = torch.randint(0, len(X), (TRAINING_CONFIG['batch_size'],))
			
 
				+        x_batch = X[indices]
			
 
				+        y_batch = Y[indices]
			
 
				+
			
 
				+        logits, loss = model(x_batch, y_batch)
			
 
				+        loss.backward()
			
 
				+
			
 
				+        # 梯度裁剪
			
 
				+        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=TRAINING_CONFIG['grad_clip'])
			
 
				+        optimizer.step()
			
 
				+
			
 
				+        monitor.update(loss.item(), epoch, model, tokenizer)
			
 
				+
			
 
				+        # 早停检查
			
 
				+        if loss.item() < best_loss:
			
 
				+            best_loss = loss.item()
			
 
				+            patience_counter = 0
			
 
				+        else:
			
 
				+            patience_counter += 1
			
 
				+
			
 
				+        # 早停条件
			
 
				+        if patience_counter >= monitor.patience and epoch > 300:
			
 
				+            print(f"🛑 早停触发，第{epoch}轮")
			
 
				+            break
			
 
				+
			
 
				+        # 损失足够小提前停止
			
 
				+        if loss.item() < TRAINING_CONFIG['min_loss'] and epoch > 200:
			
 
				+            print(f"✅ 训练完成，损失已达目标值 {loss.item():.4f}")
			
 
				+            break
			
 
				+
			
 
				+    print("🎉 训练完成！")
			
 
				+    monitor.plot_loss()
			
 
				+    return monitor.losses
			
 
				+
			
 
				+
			
 
				+# ==================== 工具函数 ====================
			
 
				+
			
 
				+def create_improved_sample_dataset(text: str, block_size: int = None) -> Tuple[
			
 
				+    torch.Tensor, torch.Tensor, 'CharTokenizer']:
			
 
				+    """创建改进的训练数据集"""
			
 
				+    block_size = block_size or TRAINING_CONFIG['block_size']
			
 
				+
			
 
				+    # 文本预处理
			
 
				+    lines = text.split('\n')
			
 
				+    cleaned_lines = []
			
 
				+    for line in lines:
			
 
				+        line = line.strip()
			
 
				+        if line and not line.startswith('#'):  # 移除空行和注释
			
 
				+            cleaned_lines.append(line)
			
 
				+    text = '\n'.join(cleaned_lines)
			
 
				+
			
 
				+    # 数据增强
			
 
				+    text = text * TRAINING_DATA_CONFIG['data_repetition']
			
 
				+
			
 
				+    tokenizer = CharTokenizer(text)
			
 
				+
			
 
				+    if len(text) < block_size + 1:
			
 
				+        print("⚠ 文本较短，使用重叠采样")
			
 
				+        data = torch.tensor(tokenizer.encode(text), dtype=torch.long)
			
 
				+        while len(data) < block_size + 1000:
			
 
				+            data = torch.cat([data, data])
			
 
				+        data = data[:block_size + 2000]
			
 
				+    else:
			
 
				+        data = torch.tensor(tokenizer.encode(text), dtype=torch.long)
			
 
				+
			
 
				+    n = len(data) - block_size
			
 
				+    if n <= 0:
			
 
				+        raise ValueError("无法创建训练样本")
			
 
				+
			
 
				+    # 创建训练样本
			
 
				+    X = torch.stack([data[i:i + block_size] for i in range(0, n, 1)])  # 步长为1获取更多样本
			
 
				+    Y = torch.stack([data[i + 1:i + block_size + 1] for i in range(0, n, 1)])
			
 
				+
			
 
				+    print(f"✅ 创建了 {len(X):,} 个训练样本")
			
 
				+    print(f"🔤 词汇表大小: {tokenizer.vocab_size}")
			
 
				+    return X, Y, tokenizer
			
 
				+
			
 
				+
			
 
				+def format_generated_text(text: str, start_text: str) -> str:
			
 
				+    """格式化生成的文本"""
			
 
				+    # 移除起始文本
			
 
				+    if text.startswith(start_text):
			
 
				+        generated_part = text[len(start_text):]
			
 
				+    else:
			
 
				+        generated_part = text
			
 
				+
			
 
				+    # 清理文本
			
 
				+    lines = generated_part.split('\n')
			
 
				+    cleaned_lines = []
			
 
				+
			
 
				+    for line in lines:
			
 
				+        line = line.strip()
			
 
				+        if line:
			
 
				+            # 简单的代码格式检测
			
 
				+            if any(keyword in line for keyword in ['def ', 'class ', 'import ', 'from ', 'if ', 'for ', 'while ']):
			
 
				+                cleaned_lines.append(line)
			
 
				+            elif line.startswith('#') or line.startswith('"""') or line.startswith("'''"):
			
 
				+                cleaned_lines.append(line)
			
 
				+            elif '=' in line or ':' in line or line.endswith(':'):
			
 
				+                cleaned_lines.append(line)
			
 
				+            elif len(line) > 10:  # 保留较长的文本行
			
 
				+                cleaned_lines.append(line)
			
 
				+
			
 
				+    return '\n'.join(cleaned_lines)
			
 
				+
			
 
				+
			
 
				+def interactive_generation(model: OptimizedGPT, tokenizer: CharTokenizer):
			
 
				+    """改进的交互式文本生成 - 使用全局参数"""
			
 
				+    print("\n" + "=" * 60)
			
 
				+    print("🤖 进入交互式生成模式")
			
 
				+    print("💡 提示: 输入Python代码片段或自然语言描述")
			
 
				+    print("⏹️  退出: 输入 'quit', 'exit', 或 '退出'")
			
 
				+    print("🔧 使用全局生成参数:")
			
 
				+    print(f"   🌡️  温度: {GENERATION_CONFIG['temperature']}")
			
 
				+    print(f"   🔝 Top-K: {GENERATION_CONFIG['top_k']}")
			
 
				+    print(f"   📏 生成长度: {GENERATION_CONFIG['max_tokens']}")
			
 
				+    print("=" * 60)
			
 
				+
			
 
				+    model.eval()
			
 
				+
			
 
				+    while True:
			
 
				+        try:
			
 
				+            print("\n" + "-" * 40)
			
 
				+            user_input = input("🎯 请输入起始文本: ").strip()
			
 
				+
			
 
				+            if user_input.lower() in ['quit', 'exit', '退出']:
			
 
				+                break
			
 
				+
			
 
				+            if not user_input:
			
 
				+                print("⚠ 输入不能为空，请重新输入。")
			
 
				+                continue
			
 
				+
			
 
				+            print(f"⚡ 生成中...", end='', flush=True)
			
 
				+            start_time = time.time()
			
 
				+
			
 
				+            # 使用全局配置参数
			
 
				+            start_tokens = torch.tensor([tokenizer.encode(user_input)], dtype=torch.long)
			
 
				+
			
 
				+            with torch.no_grad():
			
 
				+                generated_tokens = model.generate(
			
 
				+                    start_tokens,
			
 
				+                    max_new_tokens=GENERATION_CONFIG['max_tokens'],
			
 
				+                    temperature=GENERATION_CONFIG['temperature'],
			
 
				+                    top_k=GENERATION_CONFIG['top_k'],
			
 
				+                    top_p=GENERATION_CONFIG['top_p'],
			
 
				+                    repetition_penalty=GENERATION_CONFIG['repetition_penalty']
			
 
				+                )
			
 
				+
			
 
				+            elapsed = time.time() - start_time
			
 
				+            print(f"完成! (耗时: {elapsed:.2f}s)")
			
 
				+
			
 
				+            # 解码和格式化
			
 
				+            full_text = tokenizer.decode(generated_tokens[0].tolist())
			
 
				+            formatted_text = format_generated_text(full_text, user_input)
			
 
				+
			
 
				+            print(f"\n📊 生成结果:")
			
 
				+            print("=" * 50)
			
 
				+            print(f"🎯 起始: {user_input}")
			
 
				+            print("-" * 50)
			
 
				+            if formatted_text:
			
 
				+                print(formatted_text)
			
 
				+            else:
			
 
				+                # 如果格式化后为空，显示原始生成文本（截断）
			
 
				+                display_text = full_text[len(user_input):]
			
 
				+                if len(display_text) > 300:
			
 
				+                    display_text = display_text[:300] + "..."
			
 
				+                print(display_text)
			
 
				+            print("=" * 50)
			
 
				+            print(f"📏 总长度: {len(full_text)} 字符")
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            print("\n\n🛑 用户中断，退出交互模式")
			
 
				+            break
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ 生成时出错: {e}")
			
 
				+
			
 
				+
			
 
				+def get_available_models(save_dir: str = None) -> List[Tuple[str, str]]:
			
 
				+    """获取所有可用的模型文件"""
			
 
				+    save_dir = save_dir or FILE_CONFIG['save_dir']
			
 
				+    if not os.path.exists(save_dir):
			
 
				+        return []
			
 
				+
			
 
				+    model_files = [f for f in os.listdir(save_dir) if f.endswith('.pth') and 'checkpoint' not in f]
			
 
				+    if not model_files:
			
 
				+        return []
			
 
				+
			
 
				+    model_files.sort(key=lambda x: os.path.getmtime(os.path.join(save_dir, x)), reverse=True)
			
 
				+    return [(f, os.path.join(save_dir, f)) for f in model_files]
			
 
				+
			
 
				+
			
 
				+def select_model_interactively() -> str:
			
 
				+    """交互式选择模型文件"""
			
 
				+    available_models = get_available_models()
			
 
				+
			
 
				+    if not available_models:
			
 
				+        print("❌ 在 my_gptmodel 目录中未找到任何模型文件")
			
 
				+        return None
			
 
				+
			
 
				+    print("\n📂 可用的模型文件:")
			
 
				+    print("-" * 60)
			
 
				+    for i, (filename, full_path) in enumerate(available_models, 1):
			
 
				+        mtime = os.path.getmtime(full_path)
			
 
				+        mtime_str = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        size = os.path.getsize(full_path) / 1024 / 1024  # MB
			
 
				+        print(f"{i:2d}. {filename}")
			
 
				+        print(f"    修改时间: {mtime_str} | 大小: {size:.1f}MB")
			
 
				+
			
 
				+    while True:
			
 
				+        try:
			
 
				+            choice = input(f"\n🎲 请选择模型文件 (1-{len(available_models)}): ").strip()
			
 
				+            if not choice:
			
 
				+                return available_models[0][1]
			
 
				+
			
 
				+            index = int(choice) - 1
			
 
				+            if 0 <= index < len(available_models):
			
 
				+                return available_models[index][1]
			
 
				+            else:
			
 
				+                print(f"⚠ 请输入 1-{len(available_models)} 之间的数字")
			
 
				+        except ValueError:
			
 
				+            print("⚠ 请输入有效的数字")
			
 
				+
			
 
				+
			
 
				+class AdvancedGPT(OptimizedGPT):
			
 
				+    """增强版GPT"""
			
 
				+
			
 
				+    def __init__(self, config: GPTConfig):
			
 
				+        super().__init__(config)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_pretrained(cls, model_path: str, weights_only=False):
			
 
				+        """从预训练文件加载模型"""
			
 
				+        try:
			
 
				+            checkpoint = torch.load(model_path, map_location='cpu', weights_only=weights_only)
			
 
				+            config = checkpoint['config']
			
 
				+            model = cls(config)
			
 
				+            model.load_state_dict(checkpoint['model_state_dict'])
			
 
				+            global_configs = checkpoint.get('global_configs', {})
			
 
				+            return model, checkpoint.get('tokenizer', None), checkpoint.get('training_losses', []), global_configs
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ 加载模型时出错: {e}")
			
 
				+            raise
			
 
				+
			
 
				+
			
 
				+def load_and_test_model(model_path: str = None):
			
 
				+    """加载并测试模型"""
			
 
				+    try:
			
 
				+        if model_path is None:
			
 
				+            model_path = select_model_interactively()
			
 
				+            if model_path is None:
			
 
				+                return None, None
			
 
				+
			
 
				+        print(f"📥 加载模型: {model_path}")
			
 
				+        model, tokenizer_dict, losses, global_configs = AdvancedGPT.from_pretrained(model_path, weights_only=False)
			
 
				+
			
 
				+        tokenizer = CharTokenizer(stoi=tokenizer_dict)
			
 
				+        print(f"✅ 模型加载成功")
			
 
				+        print(f"🔤 词汇表大小: {tokenizer.vocab_size}")
			
 
				+
			
 
				+        # 改进的测试生成
			
 
				+        test_prompts = [
			
 
				+            "def calculate",
			
 
				+            "class Student",
			
 
				+            "import pandas",
			
 
				+            "for i in range",
			
 
				+            "# 单元测试",
			
 
				+            "def read_file"
			
 
				+        ]
			
 
				+
			
 
				+        print(f"\n🧪 模型测试生成:")
			
 
				+        print("-" * 40)
			
 
				+
			
 
				+        for i, prompt in enumerate(test_prompts[:3], 1):  # 只测试前3个
			
 
				+            print(f"\n测试 {i}: '{prompt}'")
			
 
				+            start_tokens = torch.tensor([tokenizer.encode(prompt)], dtype=torch.long)
			
 
				+
			
 
				+            with torch.no_grad():
			
 
				+                generated = model.generate(start_tokens, max_new_tokens=100)
			
 
				+
			
 
				+            result = tokenizer.decode(generated[0].tolist())
			
 
				+            formatted = format_generated_text(result, prompt)
			
 
				+            if formatted:
			
 
				+                print(formatted[:200] + "..." if len(formatted) > 200 else formatted)
			
 
				+            else:
			
 
				+                print("⚠ 生成结果为空")
			
 
				+
			
 
				+        return model, tokenizer
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ 加载模型失败: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return None, None
			
 
				+
			
 
				+
			
 
				+def create_programming_data_file():
			
 
				+    """创建编程数据文件"""
			
 
				+    programming_data_path = os.path.join(FILE_CONFIG['save_dir'], FILE_CONFIG['programming_data_file'])
			
 
				+
			
 
				+    if not os.path.exists(programming_data_path):
			
 
				+        print(f"📝 创建编程数据文件: {programming_data_path}")
			
 
				+        programming_data = collect_training_data()
			
 
				+
			
 
				+        try:
			
 
				+            with open(programming_data_path, 'w', encoding='utf-8') as f:
			
 
				+                f.write(programming_data.strip())
			
 
				+            print(f"✅ 编程数据文件已创建: {programming_data_path}")
			
 
				+            print("💡 您可以将自己的Python代码数据添加到这个文件中")
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ 创建编程数据文件时出错: {e}")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    try:
			
 
				+        # 创建保存目录
			
 
				+        save_dir = FILE_CONFIG['save_dir']
			
 
				+        os.makedirs(save_dir, exist_ok=True)
			
 
				+
			
 
				+        print("🤖 GPT语言模型训练与生成系统")
			
 
				+        print("=" * 60)
			
 
				+        print(f"📁 文件保存目录: {save_dir}")
			
 
				+        print(f"⚙️  模型配置: {MODEL_CONFIG}")
			
 
				+        print(f"⚙️  训练配置: {TRAINING_CONFIG}")
			
 
				+        print(f"⚙️  生成配置: {GENERATION_CONFIG}")
			
 
				+
			
 
				+        # 1. 自动收集训练数据
			
 
				+        print("\n1. 📚 收集训练数据...")
			
 
				+        training_data = collect_training_data()
			
 
				+
			
 
				+        # 2. 创建数据集
			
 
				+        print("\n2. 🗂️  创建数据集...")
			
 
				+        X, Y, tokenizer = create_improved_sample_dataset(training_data)
			
 
				+        print(f"   ✅ 数据集: {len(X):,} 样本")
			
 
				+        print(f"   🔤 词汇表: {tokenizer.vocab_size} 字符")
			
 
				+
			
 
				+        # 3. 创建模型
			
 
				+        print("\n3. 🧠 创建模型...")
			
 
				+        config = GPTConfig(vocab_size=tokenizer.vocab_size)
			
 
				+        print(f"   {config}")
			
 
				+
			
 
				+        model = OptimizedGPT(config)
			
 
				+        print(f"   ✅ 参数数量: {sum(p.numel() for p in model.parameters()):,}")
			
 
				+
			
 
				+        # 4. 训练模型
			
 
				+        print("\n4. 🏋️  训练模型...")
			
 
				+        losses = improved_train_gpt(model, X, Y, tokenizer)
			
 
				+
			
 
				+        # 5. 保存最终模型
			
 
				+        print("\n5. 💾 保存最终模型...")
			
 
				+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+        model_path = os.path.join(save_dir, f"{FILE_CONFIG['model_prefix']}_final_{timestamp}.pth")
			
 
				+        tokenizer_path = os.path.join(save_dir, f"{FILE_CONFIG['tokenizer_prefix']}_{timestamp}.json")
			
 
				+
			
 
				+        torch.save({
			
 
				+            'model_state_dict': model.state_dict(),
			
 
				+            'config': config,
			
 
				+            'tokenizer': tokenizer.stoi,
			
 
				+            'training_losses': losses,
			
 
				+            'timestamp': timestamp,
			
 
				+            'global_configs': {
			
 
				+                'MODEL_CONFIG': MODEL_CONFIG,
			
 
				+                'TRAINING_CONFIG': TRAINING_CONFIG,
			
 
				+                'GENERATION_CONFIG': GENERATION_CONFIG
			
 
				+            }
			
 
				+        }, model_path)
			
 
				+
			
 
				+        tokenizer.save(tokenizer_path)
			
 
				+        print(f"   ✅ 模型已保存: {model_path}")
			
 
				+        print(f"   ✅ 分词器已保存: {tokenizer_path}")
			
 
				+
			
 
				+        # 6. 交互式生成
			
 
				+        print("\n6. 🎮 进入交互模式...")
			
 
				+        interactive_generation(model, tokenizer)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ 程序执行出错: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+
			
 
				+
			
 
				+def auto_detect_and_run():
			
 
				+    """自动检测并运行"""
			
 
				+    print("🔍 GPT语言模型自动检测系统")
			
 
				+    print("=" * 50)
			
 
				+
			
 
				+    available_models = get_available_models()
			
 
				+
			
 
				+    if available_models:
			
 
				+        print(f"📂 检测到 {len(available_models)} 个现有模型")
			
 
				+        print("🔄 自动加载最新模型...")
			
 
				+
			
 
				+        latest_model_path = available_models[0][1]
			
 
				+        model, tokenizer = load_and_test_model(latest_model_path)
			
 
				+
			
 
				+        if model and tokenizer:
			
 
				+            print("\n✅ 模型加载成功，进入交互式生成模式")
			
 
				+            interactive_generation(model, tokenizer)
			
 
				+        else:
			
 
				+            print("❌ 模型加载失败，开始训练新模型...")
			
 
				+            main()
			
 
				+    else:
			
 
				+        print("❌ 未检测到现有模型，开始训练新模型...")
			
 
				+        main()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 自动检测并运行：有模型就加载，没有就训练
			
 
				+    auto_detect_and_run()
			
--- a/src/LinearAlgebra/main_matrix.py
+++ b/src/LinearAlgebra/main_matrix.py
@@ -0,0 +1,56 @@
 
				+from playLA.Matrix import Matrix  ## 引入矩阵类
			
 
				+from playLA.Vector import Vecotr
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    matrix = Matrix([[1,2] , [3,4]])  ## 简单的二维数组
			
 
				+    print(matrix)
			
 
				+    print("matrix.shape = {}".format(matrix.shape()))
			
 
				+    print("matrix.row_num = {}".format(matrix.row_num()))
			
 
				+    print("matrix.col_num = {}".format(matrix.col_num()))
			
 
				+    print("matrix.size = {}".format(matrix.size()))
			
 
				+    print("len(matrix) = {}".format(len(matrix)))
			
 
				+    print("matrix[0][0] = {}".format(matrix[0,0]))
			
 
				+
			
 
				+    matrix2 = Matrix([[5,6] , [7,8]])
			
 
				+    print("add:{}".format(matrix+matrix2))
			
 
				+    print("subtract:{}".format(matrix-matrix2))
			
 
				+    print("scalar-mul:{}".format(matrix * 2))
			
 
				+    print("scalar-mul:{}".format(2 * matrix))
			
 
				+    print("truediv:{}".format(matrix / 2))
			
 
				+    print("pos:{}".format(+matrix))
			
 
				+    print("neg:{}".format(-matrix))
			
 
				+    print("zero_2_3:{}".format(Matrix.zero(2,3)))
			
 
				+
			
 
				+    T = Matrix([[1.5 , 0] , [0,2]])
			
 
				+    p = Vecotr([5,3])
			
 
				+    print("T.dot(p) = {}".format(T.dot(p)))
			
 
				+
			
 
				+    P = Matrix([[0,4,5] , [0,0,3]])
			
 
				+    print("T.dot(P) = {}".format(T.dot(P)))
			
 
				+
			
 
				+    ## 矩阵乘法交换律 ？ A*B  B*A  （方阵） 结果不同~=！ 不遵循交换律！
			
 
				+    print("A.dot(B) = {}".format(matrix.dot(matrix2)))
			
 
				+    print("B.dot(A) = {}".format(matrix2.dot(matrix)))
			
 
				+
			
 
				+
			
 
				+
			
 
				+    # 创建一个2×3矩阵
			
 
				+    m = Matrix([[1, 2, 3], [4, 5, 6]])
			
 
				+    print(m)  # Matrix([[1, 2, 3], [4, 5, 6]])
			
 
				+    print(m.T())  # Matrix([[1, 4], [2, 5], [3, 6]])
			
 
				+
			
 
				+    # 验证转置性质
			
 
				+    print(m.shape())  # (2, 3)
			
 
				+    print(m.T().shape())  # (3, 2) 行数列数互换
			
 
				+
			
 
				+
			
 
				+    ## 单位矩阵
			
 
				+    I = Matrix.identity(2)
			
 
				+    print(I)
			
 
				+
			
 
				+    print("A.dot(I) = {}".format(matrix.dot(I)))
			
 
				+    print("I.dot(A) = {}".format(I.dot(matrix)))
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/LinearAlgebra/main_matrix_transformation.py
+++ b/src/LinearAlgebra/main_matrix_transformation.py
@@ -0,0 +1,51 @@
 
				+import math
			
 
				+
			
 
				+import matplotlib.pyplot as plt  # matrix plot library
			
 
				+from numpy.matrixlib.defmatrix import matrix
			
 
				+
			
 
				+from playLA.Matrix import Matrix
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    ## 对点集进行绘制 - F 图形
			
 
				+    points = [[0,0] ,[0,5] , [3,5] , [3,4] , [1,4],
			
 
				+              [1,3] , [2,3] , [2,2] , [1,2] , [1,0]]
			
 
				+    ## 将points点集 拆乘 x y 两个分量 ， 横、纵坐标放在一个列表中
			
 
				+    x = [point[0] for point in points]
			
 
				+    y = [point[1] for point in points]
			
 
				+
			
 
				+    plt.figure(figsize=(5,5))  ## 调整绘制窗口大小  , 单位英尺？英寸
			
 
				+    plt.xlim(-10  , 10)  ## x 坐标轴范围
			
 
				+    plt.ylim(-10  , 10)  ## y 坐标轴范围
			
 
				+    ## 参数  - 要绘制的 横纵坐标 - x,y
			
 
				+    plt.plot(x , y)
			
 
				+    # plt.show()
			
 
				+
			
 
				+
			
 
				+    ## 二维坐标列表放入矩阵中
			
 
				+    P = Matrix(points)
			
 
				+    ## 缩放矩阵 -  横坐标扩大2倍  ， 纵坐标扩大1.5
			
 
				+    # T = Matrix([[2,0] , [0,1.5]])
			
 
				+    ## 翻转变换矩阵
			
 
				+    # T = Matrix([[1,0] , [0,-1]])
			
 
				+    ## 错切 x
			
 
				+    # T = Matrix([[1,0.5] , [0 ,1]])
			
 
				+
			
 
				+    ## 错切 y
			
 
				+    # T = Matrix([[1,0] , [0.5 ,1]])
			
 
				+
			
 
				+
			
 
				+    ## 旋转  - 通常 在数学库中 ， 对于旋转的角度 都是使用 弧度制  - 60° ->   Π/3
			
 
				+    theta  = math.pi / 3
			
 
				+    ## 图形沿着原点  顺时针旋转 theta 度
			
 
				+    T  = Matrix([[math.cos(theta) , math.sin(theta)] , [-math.sin(theta) , math.cos(theta)]])
			
 
				+
			
 
				+    ## 矩阵乘法 乘点坐标 P需要转置 才能相乘
			
 
				+    P2 = T.dot(P.T())
			
 
				+    ## 直接分离 P2 的横纵坐标
			
 
				+    plt.plot([P2.col_vector(i)[0] for i in range(P2.col_num())] ,  ## 从列向量中取出 第i个列向量 ， 他的第0个分量 对应每个点的横坐标
			
 
				+             [P2.col_vector(i)[1] for i in range(P2.col_num())])  ## 从列向量中取出 第i个列向量 ， 他的第1个分量 对应每个点的纵坐标
			
 
				+    plt.show()
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/LinearAlgebra/main_numpy_matrix.py
+++ b/src/LinearAlgebra/main_numpy_matrix.py
@@ -0,0 +1,52 @@
 
				+import numpy as np  # 引入numpy库  np简化别名
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    A = np.matrix([[1,2] , [3,4]])
			
 
				+    print(A)
			
 
				+    ## 矩阵属性
			
 
				+    print(A.shape)
			
 
				+    print(A.T)
			
 
				+    ## 获取矩阵的元素
			
 
				+    print(A[1,1])
			
 
				+    print(A[0])
			
 
				+    print(A[:,0])
			
 
				+
			
 
				+    ## 矩阵基本运算
			
 
				+    print("矩阵基本运算")
			
 
				+    B = np.array([[5,6] , [7,8]])
			
 
				+    print(A + B )
			
 
				+    print(A - B )
			
 
				+    print(10 * A )
			
 
				+    print(A * 10)
			
 
				+    print(A * B )
			
 
				+    print(A.dot(B))
			
 
				+
			
 
				+    p = np.array([10 , 100])
			
 
				+    print(A + p)
			
 
				+    print(A + 1)
			
 
				+    print(A.dot(p))
			
 
				+
			
 
				+
			
 
				+    ## 单位矩阵
			
 
				+    I = np.identity(2)
			
 
				+    print(I)
			
 
				+    print(A.dot(I))
			
 
				+    print(I.dot(A))
			
 
				+
			
 
				+    # 逆矩阵 -    np.linalg -> linear algebra 子模块下 调用方法
			
 
				+    invA = np.linalg.inv(A)
			
 
				+    print(invA)
			
 
				+    # A的逆矩阵乘A = I 单位矩阵  (计算机存在误差 结果 可能不是0  1.11022302e-16)
			
 
				+    # 类设计  - 当判断一个浮点数是否为零的时候， _global中设置了 EPSILONE 精度至  ， 如果小于这个精度就说是等于零
			
 
				+    print(invA.dot(A))
			
 
				+    print(A.dot(invA))
			
 
				+
			
 
				+    ## 试验  - 只有方阵 才存在逆矩阵
			
 
				+    try :
			
 
				+        C = np.array([[1, 2, 3], [4, 5, 6]])
			
 
				+        np.linalg.inv(C)
			
 
				+    except Exception as ex:
			
 
				+        print(f"--- Caught an exception! ---")
			
 
				+        print(f"Error Type: {type(ex).__name__}")
			
 
				+        print(f"Error Message: {str(ex)}")  # 最推荐的获取消息的方式
			
--- a/src/LinearAlgebra/main_numpy_vector.py
+++ b/src/LinearAlgebra/main_numpy_vector.py
@@ -0,0 +1,91 @@
 
				+import numpy as np  # 引入numpy库  np简化别名
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+
			
 
				+    # 查看当前np对应版本号
			
 
				+    print(np.__version__)
			
 
				+    lst = [1,2,3]  # python 自身就带有列表的概念
			
 
				+    """"在这样的基础上为什么还要封装 numpy 的 向量 相应的对象 ？ 
			
 
				+    难道不吭可以直接使用python本身的列表来表示向量？
			
 
				+    除了和面向对象相关，我们把它封装成一个专门的向量对象
			
 
				+    可以非常方便的定义属于这个对象的相应的方法
			
 
				+    还有非常重要的： python的列表中 本身里面是可以存储任意类型的数据的
			
 
				+    即使这些数据类型不一致
			
 
				+    """
			
 
				+    # 比如：
			
 
				+    lst[0] = "linear algebra"
			
 
				+    print(lst)
			
 
				+    """python 列表的本质 其实 是一个 动态的数组
			
 
				+    对于数组这种数据结构 ， 它本质是为了存储数据 
			
 
				+    而并不是为了数学计算 
			
 
				+    而 numpy 里的向量 只能存储一种数据类型
			
 
				+    我们基于numpy对象进行运算速度是非常快的"""
			
 
				+
			
 
				+
			
 
				+    ## 声明一些numpy向量
			
 
				+    vec = np.array([1,2,3])  # 传入python列表
			
 
				+    print(vec)
			
 
				+
			
 
				+    ## 可更改的向量类
			
 
				+    vec[0] = 666
			
 
				+    print(vec)
			
 
				+    vec[0] = 1
			
 
				+
			
 
				+
			
 
				+    ## 创建零向量
			
 
				+    print(np.zeros(6))  ## 默认创建向量里的数据类型是浮点型  [0. 0. 0. 0. 0. 0.]
			
 
				+    ## one向量
			
 
				+    print(np.ones(6))
			
 
				+    ## 统一向量
			
 
				+    print(np.full(6 , 666))
			
 
				+
			
 
				+
			
 
				+    ## 向量的基本属性 ：
			
 
				+    ## 向量有多少元素
			
 
				+    print( "size = ", vec.size)
			
 
				+    ## np中重载的 len 函数
			
 
				+    print(len(vec))
			
 
				+    print(vec[0]) # 第一个元素
			
 
				+    print(vec[-1]) # 最后一个元素
			
 
				+    print(vec[0:2]) # 切片方式查看前两位
			
 
				+    """也就是说把原向量 前两个维度抽离出来 组成了一个新的二维向量"""
			
 
				+    print(type(vec[0:2])) # <class 'numpy.ndarray'>
			
 
				+
			
 
				+    ## np.array 的基本运算
			
 
				+    vec2 = np.array([3,5,6])
			
 
				+    vec2[0] = 4
			
 
				+    ## 向量相加
			
 
				+    print("{} + {} = {}".format(vec , vec2 , vec + vec2))
			
 
				+    ## 相减
			
 
				+    print("{} - {} = {}".format(vec , vec2 , vec - vec2))
			
 
				+    ## 数量乘法
			
 
				+    print("{} * {} = {}".format(2 , vec2 , 2 *  vec2))
			
 
				+    ## 数量乘法 - 标量 右侧
			
 
				+    print("{} * {} = {}".format(vec2 , 2 , vec2 *  2))
			
 
				+    ## 向量相乘 - element-wise multiplication  两个向量分量逐个相乘得到的向量结果 （这样的乘法是没有实际数学意义）
			
 
				+    print("{} * {} = {}".format(vec , vec2 , vec *  vec2))
			
 
				+    ## 点乘
			
 
				+    print("{}.dot({}) = {}".format(vec , vec2 , vec.dot(vec2)))
			
 
				+    ## 求模   np.linalg -> linear algebra 子模块下 调用方法
			
 
				+    print(np.linalg.norm(vec))
			
 
				+    ## 向量的规范化 / 归一化 / 单位向量
			
 
				+    print(vec / np.linalg.norm(vec)); """"将vec这个向量中的每一个元素都除以这个向量的模 ， 得到单位向量"""
			
 
				+    ## 求单位向量的模 1
			
 
				+    print(np.linalg.norm(vec / np.linalg.norm(vec)))
			
 
				+
			
 
				+    """由于numpy中没有封装求一个向量的单位向量方法
			
 
				+    所以在做除法的时候 如果 除数 是0  ， 这种情况 需要自行处理"""
			
 
				+    try:
			
 
				+        zero3 = np.zeros(3)
			
 
				+        zero3 / np.linalg.norm(zero3)
			
 
				+    except RuntimeWarning:
			
 
				+        print("除数不能为向量~")
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/LinearAlgebra/main_vector.py
+++ b/src/LinearAlgebra/main_vector.py
@@ -0,0 +1,41 @@
 
				+from playLA.Vector import Vecotr
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    vec = Vecotr([5,2])
			
 
				+    print(vec)
			
 
				+    print(len(vec))
			
 
				+    print("vec[0] = {} , vec[1] = {}".format(vec[0] , vec[1]))
			
 
				+    vec2 = Vecotr([3,1])
			
 
				+    print("{} + {} = {}".format(vec , vec2 , vec + vec2))
			
 
				+    print("{} - {} = {}".format(vec , vec2 , vec - vec2))
			
 
				+    print("{} * {} = {}".format(vec , 3 , vec * 3))
			
 
				+    print("{} * {} = {}".format(3, vec, 3 * vec))
			
 
				+
			
 
				+    print("+{} = {}".format(vec, +vec))
			
 
				+    print("-{} = {}".format(vec, -vec))
			
 
				+
			
 
				+    zero2 = Vecotr.zero(2)
			
 
				+    print(zero2)
			
 
				+    print("{} + {} = {}".format(vec , zero2 , vec + zero2))
			
 
				+
			
 
				+    # 向量模
			
 
				+    print("norm({}) = {}".format(vec,vec.norm()))
			
 
				+    print("norm({}) = {}".format(zero2,zero2.norm()))
			
 
				+
			
 
				+    # 单位向量
			
 
				+    print("normalize {} is {} ".format(vec , vec.normalize()))
			
 
				+    print(vec.normalize().norm())
			
 
				+
			
 
				+    print("normalize {} is {} ".format(vec2 , vec2.normalize()))
			
 
				+    print(vec2.normalize().norm())
			
 
				+
			
 
				+    try:
			
 
				+        print(zero2.normalize().norm())
			
 
				+    except ZeroDivisionError:
			
 
				+        print("Cannot normalize zero vector {}".format(zero2))
			
 
				+
			
 
				+    # 点乘
			
 
				+    print(vec.dot(vec2))
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/LinearAlgebra/my_gpt_model.pth
+++ b/src/LinearAlgebra/my_gpt_model.pth
--- a/src/LinearAlgebra/my_gptmodel/best_model_epoch_150.pth
+++ b/src/LinearAlgebra/my_gptmodel/best_model_epoch_150.pth
--- a/src/LinearAlgebra/playLA/Matrix.py
+++ b/src/LinearAlgebra/playLA/Matrix.py
@@ -0,0 +1,132 @@
 
				+from .Vector import Vecotr   ##  在当前这个包的Vector文件中 引入vector类
			
 
				+
			
 
				+
			
 
				+class Matrix :
			
 
				+
			
 
				+    def __init__(self , list2d):  ## 二维数组 表示矩阵
			
 
				+        self._values = [row[:] for row in list2d]  ## 取出 list2d 中的每一行 ， 把每行的内容都复制一遍 ， 形成一个新的列表 ， 这些列表又组成 一个新列表（二维数组） [:]“全选”的切片操作
			
 
				+
			
 
				+    @classmethod   ## 类方法 ， 调用这个发放需要重建矩阵实例 ，直接在矩阵类上调用
			
 
				+    def zero(cls , r, c):
			
 
				+        """返回一个r行c列的零矩阵
			
 
				+        @classmethod 的核心作用 它允许你在不创建类的实例的情况下，调用这个方法，并且这个方法可以访问和操作类本身。
			
 
				+          @staticmethod"""
			
 
				+        return cls([[0] * c for _ in range(r)])
			
 
				+
			
 
				+    @classmethod
			
 
				+    def identity(cls , n):
			
 
				+        """"返回一个n行n列的单位矩阵"""
			
 
				+        m = [[0] * n for _ in range(n)]   ## 二位列表 n行 每一行都有n个 0
			
 
				+        for i  in range(n) : # 从 0 - n
			
 
				+            m[i][i] = 1;     # 第i行i列改为1 ， 其他保持0
			
 
				+        return  cls(m)   # 返回matrix对象
			
 
				+
			
 
				+    def T(self):
			
 
				+        """返回矩阵的转置矩阵
			
 
				+        将原矩阵的行转为列，列转为行
			
 
				+        """
			
 
				+        # 使用列表推导式创建转置矩阵
			
 
				+        # 对于每一列j，取出所有行在该列的元素组成新的行
			
 
				+        return Matrix([[self._values[i][j] for i in range(self.row_num())]
			
 
				+                      for j in range(self.col_num())])
			
 
				+
			
 
				+    def shape(self):
			
 
				+        """返回矩阵的形状 ： （行数 ， 列数）
			
 
				+        由于包含 （行数 ， 列数） 两个信息 所以通常是返回 二元组 形式
			
 
				+        行 ： len(self._values)  列表的长度
			
 
				+        列 ： len(self._values[0]) 第一个行向量 ， 里面有几个元素 相应就是有几列  （对于这个矩阵来说，每一行个数都一样 ， 并且矩阵至少要有一行） """
			
 
				+        return len(self._values) , len(self._values[0])
			
 
				+
			
 
				+    def dot (self , another):
			
 
				+        """返回矩阵乘法结果"""
			
 
				+        if isinstance(another , Vecotr): # 传入是否是向量
			
 
				+            ## 矩阵和向量的乘法  矩阵列数是否等于向量的元素数量？
			
 
				+            assert self.col_num() == len(another) ,\
			
 
				+                "Error in Matrix-Vector Multiplication."
			
 
				+            ## 矩阵 * 向量 得到的还是向量
			
 
				+            return Vecotr([self.row_vector(i).dot(another) for i in range(self.row_num())])
			
 
				+        if isinstance(another , Matrix) : # 传入是否是矩阵
			
 
				+            ## 矩阵和矩阵的乘法    前矩阵列数 必须等于 后矩阵行数
			
 
				+            assert  self.col_num() == another.row_num() ,\
			
 
				+                "Error in Matrix-Matrix Multiplication . "
			
 
				+            return Matrix([[self.row_vector(i).dot(another.col_vector(j))
			
 
				+                            for j in range(another.col_num())]
			
 
				+                           for i in range(self.row_num())])
			
 
				+        """每一次从前面的矩阵拿出一行 对于这一行向量 - 每次从后面的矩阵拿出一列
			
 
				+        前面的第i行和后面的第j列 进行点乘  点乘的结果就算结果矩阵的第 ij个元素 - i行j列"""
			
 
				+
			
 
				+    def row_num(self):
			
 
				+        """返回矩阵的行数
			
 
				+        shape 元组的第1个 元素 就是 矩阵 行数"""
			
 
				+        return self.shape()[0]
			
 
				+
			
 
				+    def col_num(self):
			
 
				+        """返回矩阵列数
			
 
				+        shape 元组的第2个 元素 就是 矩阵 列数"""
			
 
				+        return self.shape()[1]
			
 
				+
			
 
				+    def size(self):
			
 
				+        """返回矩阵的元素个数"""
			
 
				+        r ,c = self.shape()
			
 
				+        return r*c
			
 
				+
			
 
				+    def row_vector(self , index):
			
 
				+        """返回矩阵的第index个行向量"""
			
 
				+        return  Vecotr(self._values[index])
			
 
				+    def col_vector(self , index):
			
 
				+        """返回矩阵的第index个列向量
			
 
				+        取出矩阵对应每一行 把每一行的第index个元素拿出来 ， 构成一个向量"""
			
 
				+        return Vecotr([row[index] for row in self._values])
			
 
				+
			
 
				+
			
 
				+    ## 打印的时候显示
			
 
				+    def __repr__(self):
			
 
				+        return "Matrix({}).".format(self._values)
			
 
				+    __str__ = __repr__  ## 让他们相等 不区分方法返回字符串
			
 
				+
			
 
				+    __len__ = row_num ## 等同于 二维列表多少行
			
 
				+
			
 
				+    def __getitem__(self, pos):
			
 
				+        """返回矩阵pos位置的元素  ，  利用元组 传递行、列"""
			
 
				+        r , c = pos  ##  元组解包（Tuple Unpacking）
			
 
				+        return self._values[r][c]
			
 
				+    def __add__(self, another):
			
 
				+        """返回两个矩阵的加法结果
			
 
				+        先判断两个矩阵形状一样"""
			
 
				+        assert  self.shape() == another.shape(),\
			
 
				+            "Error in adding . Shape of matrix must be same ."
			
 
				+        """python特有的列表表达式 - 更加 pythonic ！
			
 
				+        新建一个新的matrix类 传入一个新的 加和 二维数组"""
			
 
				+        return Matrix([[a + b for a,b in zip(self.row_vector(i) , another.row_vector(i))]
			
 
				+                       for i in range(self.row_num())])
			
 
				+    def __sub__(self, another):
			
 
				+        """返回两个矩阵的减法结果
			
 
				+        先判断两个矩阵形状一样"""
			
 
				+        assert  self.shape() == another.shape(),\
			
 
				+            "Error in subtracting . Shape of matrix must be same ."
			
 
				+        """python特有的列表表达式 - 更加 pythonic ！
			
 
				+        新建一个新的matrix类 传入一个新的 加和 二维数组"""
			
 
				+        return Matrix([[a - b for a,b in zip(self.row_vector(i) , another.row_vector(i))]
			
 
				+                       for i in range(self.row_num())])
			
 
				+    def __mul__(self, k):
			
 
				+        """返回矩阵的数量乘结果 ： self * k
			
 
				+        创建新的矩阵 - 传入二维列表"""
			
 
				+        return Matrix([[e * k for  e in self.row_vector(i)]
			
 
				+                       for i  in range(self.row_num())])
			
 
				+    def __rmul__(self, k):
			
 
				+        """返回矩阵的数量乘结果 - 右乘 ： k * self
			
 
				+        一个数字乘一个矩阵是拥有交换律的 ： 可以使用 mul 结果一致"""
			
 
				+        return self * k
			
 
				+    def __truediv__(self, k):
			
 
				+        """"返回数量除法的结果 ： self / k
			
 
				+        可以转成数量的乘法 ， 也就是用 1/k 乘 当前矩阵"""
			
 
				+        return (1 / k) * self
			
 
				+    def __pos__(self):
			
 
				+        """返回矩阵取正的结果"""
			
 
				+        return 1 * self
			
 
				+    def __neg__(self):
			
 
				+        """返回矩阵取负的结果"""
			
 
				+        return -1 * self  ## 矩阵的数量乘法来表示
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/LinearAlgebra/playLA/Vector.py
+++ b/src/LinearAlgebra/playLA/Vector.py
@@ -0,0 +1,80 @@
 
				+import  math
			
 
				+from ._global import EPSILONE  ## .是从当前的包中 _global中 模块导入 epsilon变量
			
 
				+
			
 
				+class Vecotr :
			
 
				+    def __init__(self  ,  lst):
			
 
				+        # 调用构造的时候传入的是一个引用之  ， 如调用处 这个值修改的话，就会影响_values值
			
 
				+        self._values = list(lst)  # 利用list构造函数 ， 相当于复制一份
			
 
				+    @classmethod
			
 
				+    def zero(cls , dim):
			
 
				+        """返回一个dim维的零向量"""
			
 
				+        return cls([0] * dim)
			
 
				+    def norm(self):
			
 
				+        """返回向量的模"""
			
 
				+        return math.sqrt(sum(e ** 2 for e in self))
			
 
				+    def normalize(self):
			
 
				+        """返回向量的单位向量  -  向量中每一个分量除以此向量的模 最终组成返回的单位向量"""
			
 
				+        # return Vecotr([e / self.norm() for e in self])  这样不优  ， 因为重复计算了norm
			
 
				+        # norm提到外面  ， 并且向量创建一个副本  ，ps： 直接用 self 会修改原始向量，而创建副本则不会
			
 
				+        # return 1/self.norm() * Vecotr(self._values)
			
 
				+        # 判断零向量 ， 如果是浮点数 ， 和0进行判断 是不能直接使用 == 的  是因为计算机中 浮点计算很容易产生误差
			
 
				+        #norm 是一个正数 因为它是开跟出来的
			
 
				+        # if self.norm() == 0 :
			
 
				+        if self.norm()  < EPSILONE : # 和一个极小的值作比较
			
 
				+            raise ZeroDivisionError("Normalize error ！  norm is zero ~ ")
			
 
				+        return Vecotr(self._values) / self.norm()
			
 
				+    def dot(self,another):
			
 
				+        """向量点乘 ， 返回结果标量
			
 
				+        两个向量进行点乘 ， 维度必须相等"""
			
 
				+        assert len(self) == len(another),\
			
 
				+            "Error in dot product . Length of vectors must same ."
			
 
				+        return sum(a * b for a , b in zip(self , another))
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return "Vector({})".format(self._values)
			
 
				+    def __str__(self):
			
 
				+        return "({})".format(", ".join( str(e) for e in self._values))
			
 
				+    def __len__(self):
			
 
				+        """返回向量长度 (有多少个元素)"""
			
 
				+        return len(self._values)
			
 
				+    def __getitem__(self, index):
			
 
				+        """取向量的第index个元素"""
			
 
				+        return self._values[index]
			
 
				+    def  __add__(self, another):
			
 
				+        """"向量加法 ， 返回结果向量"""
			
 
				+        assert len(self) == len(another), \
			
 
				+            "Error in adding  . Length of vectors must be same . "
			
 
				+        # return Vecotr([a + b for a,b in zip(self._values , another._values)])  # 尽量不去访问私有成员变量 _下划线约定
			
 
				+        return Vecotr([a + b for a, b in zip(self, another)])  #可迭代不需要访问 __values
			
 
				+    def __sub__(self, another):
			
 
				+        """向量剑法  ， 返回结果向量"""
			
 
				+        assert  len(self) == len(another), \
			
 
				+            "Error in subtracting . Length of vectors must be same ."
			
 
				+        return Vecotr([a- b for a,b in zip(self , another)])
			
 
				+    def __iter__(self):
			
 
				+        """"返回向量的迭代器"""
			
 
				+        return self._values.__iter__()  # 列表本身就有迭代器
			
 
				+    def __mul__(self, k):
			
 
				+        """返回数量乘法的结果向量 ： self * k"""
			
 
				+        return Vecotr([k * e for e in self])
			
 
				+    def __rmul__(self, k):
			
 
				+        """返回数量乘法的结果向量 ： k * self """
			
 
				+        return self * k
			
 
				+    def __pos__(self):
			
 
				+        """"返回向量取正的结果向量"""
			
 
				+        return 1* self
			
 
				+    def __neg__(self):
			
 
				+        """返回向量取负的结果向量"""
			
 
				+        return -1 * self
			
 
				+    def __truediv__(self, k):
			
 
				+        """返回数量除法的结果向量  -  / // python3 中区分普通的除法、整数除法
			
 
				+        由于实现的向量类中 ， 每一个元素不一定都是整数  所以覆盖 __truediv__ 魔法方法
			
 
				+        这个数量除法本质就是 数量乘法 ， 为了编程方便 ， 造了一个这个概念
			
 
				+        其结果就相当于是  self / k"""
			
 
				+        return (1/k) * self
			
 
				+
			
 
				+
			
--- a/src/LinearAlgebra/playLA/__init__.py
+++ b/src/LinearAlgebra/playLA/__init__.py
--- a/src/LinearAlgebra/playLA/_global.py
+++ b/src/LinearAlgebra/playLA/_global.py
@@ -0,0 +1,2 @@
 
				+## 包内全局访问的 ， 但是不提供用户访问
			
 
				+EPSILONE = 1e-8  ## 精度范围
			
--- a/src/LinearAlgebra/readme_deep.txt
+++ b/src/LinearAlgebra/readme_deep.txt
@@ -0,0 +1,176 @@
 
				+GPT模型结构:
			
 
				+├── 输入嵌入层 (Token Embedding)
			
 
				+├── 位置编码层 (Positional Encoding)
			
 
				+├── Transformer块 × N层:
			
 
				+│   ├── 层归一化
			
 
				+│   ├── 因果自注意力机制
			
 
				+│   ├── 层归一化
			
 
				+│   └── 前馈神经网络
			
 
				+├── 最终层归一化
			
 
				+└── 语言模型头 (输出层)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+关键特性：
			
 
				+
			
 
				+✅ 因果掩码: 确保模型只能看到左侧的token
			
 
				+
			
 
				+✅ 多头注意力: 并行处理不同表示子空间
			
 
				+
			
 
				+✅ 权重共享: 输入输出嵌入权重共享
			
 
				+
			
 
				+✅ 位置编码: 可学习的位置嵌入
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+        """"将下载好的模型放在~/.cache/torch/checkpoints文件夹中即可（windows为C:\用户名\.cache\torch\.checkpoints）
			
 
				+
			
 
				+Resnet:
			
 
				+
			
 
				+model_urls = {
			
 
				+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
			
 
				+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
			
 
				+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
			
 
				+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
			
 
				+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
			
 
				+}
			
 
				+
			
 
				+inception:
			
 
				+
			
 
				+model_urls = {
			
 
				+    # Inception v3 ported from TensorFlow
			
 
				+    'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
			
 
				+}
			
 
				+
			
 
				+Densenet:
			
 
				+
			
 
				+model_urls = {
			
 
				+    'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
			
 
				+    'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
			
 
				+    'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
			
 
				+    'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
			
 
				+}
			
 
				+
			
 
				+Alexnet:
			
 
				+
			
 
				+model_urls = {
			
 
				+    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
			
 
				+}
			
 
				+
			
 
				+vggnet:
			
 
				+
			
 
				+model_urls = {
			
 
				+    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
			
 
				+    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
			
 
				+    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
			
 
				+    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
			
 
				+    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
			
 
				+    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
			
 
				+    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
			
 
				+    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
			
 
				+}"""
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+训练流程
			
 
				+
			
 
				+训练步骤:
			
 
				+1. 数据准备 → 创建字符级分词器
			
 
				+2. 样本生成 → 滑动窗口创建训练对
			
 
				+3. 模型配置 → 设置超参数
			
 
				+4. 训练循环 → 前向传播 + 反向传播
			
 
				+5. 模型保存 → 保存权重和配置
			
 
				+
			
 
				+
			
 
				+
			
 
				+生成策略:
			
 
				+- 温度控制 (temperature):
			
 
				+  - <1.0: 更确定性的输出
			
 
				+  - 1.0: 原始分布
			
 
				+  - >1.0: 更多样化的输出
			
 
				+
			
 
				+- Top-K采样:
			
 
				+  - 只从概率最高的K个token中采样
			
 
				+  - 避免选择低概率的token
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+  # 输入
			
 
				+start_text = "从前有座山"
			
 
				+
			
 
				+# 生成过程
			
 
				+1. 编码: "从前有座山" → [23, 45, 67, 89, 12]
			
 
				+2. 自回归生成:
			
 
				+   输入: [23, 45, 67, 89, 12] → 预测下一个token: 34
			
 
				+   输入: [23, 45, 67, 89, 12, 34] → 预测下一个token: 56
			
 
				+   ... 重复直到生成长度达到100
			
 
				+3. 解码: [23, 45, 67, 89, 12, 34, 56, ...] → "从前有座山，山里有座庙..."
			
 
				+
			
 
				+# 输出示例:
			
 
				+"从前有座山，山里有座庙，庙里有个老和尚在给小和尚讲故事..."
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+配置选项:
			
 
				+- vocab_size: 词汇表大小 (默认: 50257)
			
 
				+- n_layer: Transformer层数 (默认: 12)
			
 
				+- n_head: 注意力头数 (默认: 12)
			
 
				+- n_embd: 嵌入维度 (默认: 768)
			
 
				+- max_seq_len: 最大序列长度 (默认: 1024)
			
 
				+- dropout: 丢弃率 (默认: 0.1)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+扩展功能:
			
 
				+1. 从预训练模型加载
			
 
				+2. 注意力权重可视化
			
 
				+3. 自定义分词器
			
 
				+4. 多种采样策略
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+文本数据 → 分词器 → 训练样本 → GPT模型训练 → 模型保存
			
 
				+     ↓
			
 
				+文本生成 ← 解码 ← 采样 ← 前向传播 ← 输入提示
			
 
				+
			
 
				+
			
 
				+
			
 
				+🎯 主要应用场景
			
 
				+文本生成: 故事创作、对话生成
			
 
				+
			
 
				+语言建模: 学习文本数据的概率分布
			
 
				+
			
 
				+教育演示: 理解Transformer架构
			
 
				+
			
 
				+研究实验: NLP模型的基础构建块
			
 
				+
			
 
				+📊 性能特点
			
 
				+特性	说明
			
 
				+参数量	可配置，从几万到数亿参数
			
 
				+训练数据	支持自定义文本数据
			
 
				+生成质量	依赖训练数据和模型大小
			
 
				+计算需求	GPU加速训练，CPU可推理