roshnn24 commited on
Commit
f50f6c8
·
verified ·
1 Parent(s): aaf6088

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -45
app.py CHANGED
@@ -19,60 +19,89 @@ from huggingface_hub import login
19
 
20
  app = Flask(__name__)
21
 
 
22
  PORT = int(os.environ.get("PORT", 7860))
23
 
24
- hf_token = os.environ.get("HF_TOKEN")
25
- if hf_token:
26
- login(hf_token)
27
-
28
- UPLOAD_FOLDER = '/tmp/uploads' # Change to tmp directory for Spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  ALLOWED_EXTENSIONS = {'py'}
30
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
31
- os.makedirs(UPLOAD_FOLDER, exist_ok=True)
32
  # Database configuration
33
  DATABASE_PATH = '/tmp/chat_database.db'
34
- CACHE_DIR = "/tmp/huggingface_cache"
35
- MODEL_CACHE_DIR = "/tmp/model_cache"
36
- os.makedirs(CACHE_DIR, exist_ok=True)
37
- os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
38
- os.environ['TRANSFORMERS_CACHE'] = CACHE_DIR
39
- os.environ['HF_HOME'] = CACHE_DIR
40
- os.environ['HF_DATASETS_CACHE'] = CACHE_DIR
41
-
42
- # Initialize LangChain with Ollama LLM
43
- if hf_token:
44
- model_name = "mistralai/Mistral-7B-Instruct-v0.1"
45
- else:
46
- # Fallback to a free, smaller model
47
- model_name = "microsoft/phi-4"
48
-
49
- try:
50
- tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=CACHE_DIR)
51
- model = AutoModelForCausalLM.from_pretrained(
52
- model_name,
53
- torch_dtype=torch.float16,
54
- device_map="auto",
55
- load_in_8bit=True,
56
- cache_dir=MODEL_CACHE_DIR
57
- )
58
 
59
- # Create pipeline
60
- pipe = pipeline(
61
- "text-generation",
62
- model=model,
63
- tokenizer=tokenizer,
64
- max_new_tokens=512,
65
- temperature=0.7,
66
- top_p=0.95,
67
- repetition_penalty=1.15
68
- )
 
 
 
 
69
 
70
- # Initialize LangChain with HuggingFacePipeline
71
- llm = HuggingFacePipeline(pipeline=pipe)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- except Exception as e:
74
- print(f"Error loading model: {e}")
75
- raise
76
 
77
  @contextmanager
78
  def get_db_connection():
 
19
 
20
  app = Flask(__name__)
21
 
22
+ # Configuration for Hugging Face Spaces
23
  PORT = int(os.environ.get("PORT", 7860))
24
 
25
+ # Set cache directories to /tmp
26
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
27
+ os.environ['HF_HOME'] = '/tmp/hf_home'
28
+ os.environ['XDG_CACHE_HOME'] = '/tmp/cache'
29
+ os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets_cache'
30
+
31
+ # Create necessary directories with proper permissions
32
+ for directory in [
33
+ '/tmp/transformers_cache',
34
+ '/tmp/hf_home',
35
+ '/tmp/cache',
36
+ '/tmp/datasets_cache',
37
+ '/tmp/uploads'
38
+ ]:
39
+ os.makedirs(directory, exist_ok=True)
40
+
41
+ # Configure upload folder inside the space
42
+ UPLOAD_FOLDER = '/tmp/uploads'
43
  ALLOWED_EXTENSIONS = {'py'}
44
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
45
+
46
  # Database configuration
47
  DATABASE_PATH = '/tmp/chat_database.db'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ def get_model_name():
50
+ """Determine which model to use based on token availability"""
51
+ try:
52
+ hf_token = os.environ.get("HF_TOKEN")
53
+ if hf_token:
54
+ # Set token in environment and return gated model name
55
+ os.environ['HUGGING_FACE_HUB_TOKEN'] = hf_token
56
+ return "mistralai/Mistral-7B-Instruct-v0.1"
57
+ else:
58
+ # Return free model if no token
59
+ return "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
60
+ except Exception as e:
61
+ print(f"Error accessing token: {e}")
62
+ return "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
63
 
64
+ def initialize_model():
65
+ """Initialize the model with appropriate settings"""
66
+ try:
67
+ model_name = get_model_name()
68
+ print(f"Initializing model: {model_name}")
69
+
70
+ # Initialize tokenizer with explicit cache directory
71
+ tokenizer = AutoTokenizer.from_pretrained(
72
+ model_name,
73
+ cache_dir='/tmp/transformers_cache',
74
+ token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
75
+ )
76
+
77
+ # Initialize model with explicit cache directory
78
+ model = AutoModelForCausalLM.from_pretrained(
79
+ model_name,
80
+ cache_dir='/tmp/transformers_cache',
81
+ token=os.environ.get('HUGGING_FACE_HUB_TOKEN'),
82
+ torch_dtype=torch.float16,
83
+ device_map="auto",
84
+ load_in_8bit=True
85
+ )
86
+
87
+ # Create pipeline
88
+ pipe = pipeline(
89
+ "text-generation",
90
+ model=model,
91
+ tokenizer=tokenizer,
92
+ max_new_tokens=512,
93
+ temperature=0.7,
94
+ top_p=0.95,
95
+ repetition_penalty=1.15
96
+ )
97
+
98
+ return HuggingFacePipeline(pipeline=pipe)
99
+ except Exception as e:
100
+ print(f"Error initializing model: {e}")
101
+ raise
102
 
103
+ # Initialize LLM
104
+ llm = initialize_model()
 
105
 
106
  @contextmanager
107
  def get_db_connection():