Mo-alaa commited on
Commit
29a6b30
·
1 Parent(s): ac462f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -15
app.py CHANGED
@@ -1,21 +1,33 @@
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import streamlit as st
3
- device = "cuda" # the device to load the model onto
 
 
 
 
 
 
 
 
 
4
 
5
- model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
6
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
7
 
8
- messages = [
9
- {"role": "user", "content": "What is your favourite condiment?"},
10
- {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
11
- {"role": "user", "content": "Do you have mayonnaise recipes?"}
12
- ]
13
 
14
- encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- model_inputs = encodeds.to(device)
17
- model.to(device)
18
-
19
- generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
20
- decoded = tokenizer.batch_decode(generated_ids)
21
- st.write(decoded[0])
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import streamlit as st
3
+ from transformers import AutoTokenizer, AutoModelWithLMHead
4
+ import torch
5
+ if torch.cuda.is_available():
6
+ device = torch.device("cuda")
7
+ else:
8
+ device = "cpu"
9
+
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained("salesken/content_generation_from_phrases")
12
+ model = AutoModelWithLMHead.from_pretrained("salesken/content_generation_from_phrases").to(device)
13
 
 
 
14
 
15
+ input_query=["data science beginner"]
16
+ query = "<|startoftext|> " + input_query[0] + " ~~"
 
 
 
17
 
18
+ input_ids = tokenizer.encode(query.lower(), return_tensors='pt').to(device)
19
+ sample_outputs = model.generate(input_ids,
20
+ do_sample=True,
21
+ num_beams=1,
22
+ max_length=256,
23
+ temperature=0.9,
24
+ top_k = 30,
25
+ num_return_sequences=100)
26
+ content = []
27
+ for i in range(len(sample_outputs)):
28
+ r = tokenizer.decode(sample_outputs[i], skip_special_tokens=True).split('||')[0]
29
+ r = r.split(' ~~ ')[1]
30
+ if r not in content:
31
+ content.append(r)
32
 
33
+ st.write(content)