File size: 2,436 Bytes
939262b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
isExpanded: false
    sections:
local: tasks/idefics
  title: Image tasks with IDEFICS
local: tasks/prompting
  title: LLM prompting guide
title: Prompting
  title: Task Guides

sections:
local: fast_tokenizers
    title: Use fast tokenizers from 🤗 Tokenizers
local: multilingual
    title: Run inference with multilingual models
local: create_a_model
    title: Use model-specific APIs
local: custom_models
    title: Share a custom model
local: chat_templating
    title: Templates for chat models
local: trainer
    title: Trainer
local: sagemaker
    title: Run training on Amazon SageMaker
local: serialization
    title: Export to ONNX
local: tflite
    title: Export to TFLite
local: torchscript
    title: Export to TorchScript
local: benchmarks
    title: Benchmarks
local: notebooks
    title: Notebooks with examples
local: community
    title: Community resources
local: troubleshooting
    title: Troubleshoot
local: gguf
    title: Interoperability with GGUF files
  title: Developer guides
sections:
local: quantization/overview
    title: Getting started
local: quantization/bitsandbytes
    title: bitsandbytes
local: quantization/gptq
    title: GPTQ
local: quantization/awq
    title: AWQ
local: quantization/aqlm
    title: AQLM
local: quantization/quanto
    title: Quanto
local: quantization/eetq
    title: EETQ
local: quantization/hqq
    title: HQQ
local: quantization/optimum
    title: Optimum
local: quantization/contribute
    title: Contribute new quantization method
  title: Quantization Methods
sections:
local: performance
    title: Overview
local: llm_optims
    title: LLM inference optimization
sections:
local: perf_train_gpu_one
  title: Methods and tools for efficient training on a single GPU
local: perf_train_gpu_many
  title: Multiple GPUs and parallelism
local: fsdp
  title: Fully Sharded Data Parallel
local: deepspeed
  title: DeepSpeed
local: perf_train_cpu
  title: Efficient training on CPU
local: perf_train_cpu_many
  title: Distributed CPU training
local: perf_train_tpu_tf
  title: Training on TPU with TensorFlow
local: perf_train_special
  title: PyTorch training on Apple silicon
local: perf_hardware
  title: Custom hardware for training
local: hpo_train
  title: Hyperparameter Search using Trainer API
title: Efficient training techniques

sections:
local: perf_infer_cpu
  title: CPU inference
local: perf_infer_gpu_one
  title: GPU inference
title: Optimizing inference