Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,10 +34,10 @@ optimal_threads = max(4, cpu_count - 1) # Leave one core free
|
|
34 |
print(f"Using {optimal_threads} of {cpu_count} CPU cores")
|
35 |
|
36 |
# Download model files
|
37 |
-
def get_model_path(repo_id
|
38 |
print(f"Obtaining {filename}...")
|
39 |
# Download to our custom cache location
|
40 |
-
return hf_hub_download(repo_id=repo_id,
|
41 |
|
42 |
# Function to quantize model to int4 or int8
|
43 |
def quantize_model(input_model_path, output_model_path, quantization_type="q4_0"):
|
@@ -68,12 +68,10 @@ def quantize_model(input_model_path, output_model_path, quantization_type="q4_0"
|
|
68 |
|
69 |
# Download models
|
70 |
base_model_path = get_model_path(
|
71 |
-
"johnpaulbin/articulate-11-expspanish-base-merged
|
72 |
-
"articulate-11-expspanish-base-merged-q8_0.gguf"
|
73 |
)
|
74 |
adapter_path = get_model_path(
|
75 |
-
"johnpaulbin/articulate-V1
|
76 |
-
"articulate-V1-q8_0.gguf"
|
77 |
)
|
78 |
|
79 |
# Quantize models (creates int4 versions for faster CPU inference)
|
|
|
34 |
print(f"Using {optimal_threads} of {cpu_count} CPU cores")
|
35 |
|
36 |
# Download model files
|
37 |
+
def get_model_path(repo_id):
|
38 |
print(f"Obtaining {filename}...")
|
39 |
# Download to our custom cache location
|
40 |
+
return hf_hub_download(repo_id=repo_id, cache_dir=MODEL_CACHE)
|
41 |
|
42 |
# Function to quantize model to int4 or int8
|
43 |
def quantize_model(input_model_path, output_model_path, quantization_type="q4_0"):
|
|
|
68 |
|
69 |
# Download models
|
70 |
base_model_path = get_model_path(
|
71 |
+
"johnpaulbin/articulate-11-expspanish-base-merged"
|
|
|
72 |
)
|
73 |
adapter_path = get_model_path(
|
74 |
+
"johnpaulbin/articulate-V1"
|
|
|
75 |
)
|
76 |
|
77 |
# Quantize models (creates int4 versions for faster CPU inference)
|