File size: 1,181 Bytes
f41f56e
ac462f6
29a6b30
 
 
 
 
 
 
 
 
 
f41f56e
 
29a6b30
 
f41f56e
29a6b30
 
 
 
 
 
 
 
 
 
 
 
 
 
f41f56e
29a6b30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from transformers import AutoModelForCausalLM, AutoTokenizer
import streamlit as st
from transformers import AutoTokenizer, AutoModelWithLMHead  
import torch
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = "cpu"
    
    
tokenizer = AutoTokenizer.from_pretrained("salesken/content_generation_from_phrases") 
model = AutoModelWithLMHead.from_pretrained("salesken/content_generation_from_phrases").to(device)


input_query=["data science beginner"]
query = "<|startoftext|> " + input_query[0] + " ~~"

input_ids = tokenizer.encode(query.lower(), return_tensors='pt').to(device)
sample_outputs = model.generate(input_ids,
                                do_sample=True,
                                num_beams=1, 
                                max_length=256,
                                temperature=0.9,
                                top_k = 30,
                                num_return_sequences=100)
content = []
for i in range(len(sample_outputs)):
    r = tokenizer.decode(sample_outputs[i], skip_special_tokens=True).split('||')[0]
    r = r.split(' ~~ ')[1]
    if r not in content:
        content.append(r)

st.write(content)