Eric Botti commited on
Commit
e5d260a
·
1 Parent(s): 8aa24e3

created streamlit interface

Browse files
Files changed (3) hide show
  1. app.py +24 -0
  2. main.py +50 -40
  3. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # standard
2
+ from io import StringIO
3
+ # 3rd party
4
+ import streamlit as st
5
+ # local
6
+ import main
7
+
8
+ st.set_page_config(page_title='Transcript Notetaker', page_icon=':memo:', layout='wide')
9
+
10
+ st.write("Hello World")
11
+
12
+ upload = st.file_uploader("Transcript", type='.txt')
13
+
14
+ take_notes = st.button("Create Notes")
15
+
16
+ if take_notes and upload:
17
+ upload_stringio = StringIO(upload.getvalue().decode('UTF-8'))
18
+
19
+ notes = main.create_meeting_notes(upload_stringio)
20
+
21
+ if notes:
22
+ st.download_button("Download Notes", notes, "notes.md")
23
+
24
+ st.markdown(notes)
main.py CHANGED
@@ -5,6 +5,7 @@ import time
5
  import re
6
  # 3rd party
7
  from langchain.llms import OpenAI
 
8
  from langchain import LLMChain
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain import PromptTemplate
@@ -13,13 +14,31 @@ from langchain import PromptTemplate
13
  config = configparser.ConfigParser()
14
  config.read('config.ini')
15
 
16
-
17
- def load_transcript(path: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Google Meet Transcripts have a header which we don't want to be summarized
19
  header_lines = 5
20
 
21
- with open(path, 'r') as input_file:
22
- file_text = input_file.readlines()
23
 
24
  head = file_text[:header_lines]
25
  transcript = "".join(file_text[header_lines:])
@@ -27,35 +46,21 @@ def load_transcript(path: str):
27
  return head, transcript
28
 
29
 
30
- if __name__ == '__main__':
31
  # read config variables
32
- if not os.getenv("OPENAI_API_KEY"):
33
- os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key']
34
- transcript_filepath = config['OPTIONAL']['transcript-filepath']
35
- notes_filepath = config['OPTIONAL']['notes-filepath']
36
-
37
- llm = OpenAI(temperature=0)
38
 
39
- head, transcript = load_transcript(transcript_filepath)
40
 
41
  # split the transcript on the 5-min timestamps
42
- regex_pattern = r"[0-9]{2}:[0-9]{2}:[0-9]{2}"
43
  five_min_chunks = re.split(regex_pattern, transcript)
44
 
45
  # create a textsplitter to subdivide those chunks into appropriately sized chunks.
46
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
47
-
48
- # prompt
49
- prompt = PromptTemplate(
50
- template="Write a concise summary of the following: {transcript}",
51
- input_variables=['transcript']
52
- )
53
-
54
- chain = LLMChain(
55
- prompt=prompt,
56
- llm=llm,
57
- verbose=False
58
- )
59
 
60
  # list the meeting time and the chunks associated with it
61
  timestamped_summaries = []
@@ -68,7 +73,7 @@ if __name__ == '__main__':
68
  sub_chunks = text_splitter.split_text(five_minutes_chunk)
69
 
70
  summaries = []
71
- for j, chunk in enumerate(sub_chunks):
72
  summaries.append(chain.run(chunk))
73
  print(f"{timestamp}: Chunk {j}/{len(sub_chunks)}")
74
 
@@ -80,16 +85,21 @@ if __name__ == '__main__':
80
 
81
  first_line = re.split(r"[()]", head[0])
82
 
83
- # Write summaries to file
84
- with open(notes_filepath, 'w+') as f:
85
- f.write(f"# {first_line[0]}\n")
86
- f.write(f"{first_line[1]}\n")
87
- f.write("## Attendees\n")
88
- f.write(f"{head[2]}\n")
89
- f.write('## Meeting Notes\n')
90
- for timestamp, summaries in timestamped_summaries:
91
- f.write(f"### {timestamp}\n")
92
- for summary in summaries:
93
- f.write(f"- {summary.strip()}\n")
94
-
95
- print(f"Export to file {notes_filepath} completed")
 
 
 
 
 
 
5
  import re
6
  # 3rd party
7
  from langchain.llms import OpenAI
8
+ from langchain.chat_models import ChatOpenAI
9
  from langchain import LLMChain
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain import PromptTemplate
 
14
  config = configparser.ConfigParser()
15
  config.read('config.ini')
16
 
17
+ # read config variables
18
+ if not os.getenv("OPENAI_API_KEY"):
19
+ os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key']
20
+
21
+ # LangChain Config
22
+ # llm
23
+ llm = OpenAI(temperature=0)
24
+ # prompt
25
+ prompt = PromptTemplate(
26
+ template="Write a concise summary of the following: {transcript}",
27
+ input_variables=['transcript']
28
+ )
29
+ # chain
30
+ chain = LLMChain(
31
+ prompt=prompt,
32
+ llm=llm,
33
+ verbose=False
34
+ )
35
+
36
+
37
+ def load_transcript(input_file):
38
  # Google Meet Transcripts have a header which we don't want to be summarized
39
  header_lines = 5
40
 
41
+ file_text = input_file.readlines()
 
42
 
43
  head = file_text[:header_lines]
44
  transcript = "".join(file_text[header_lines:])
 
46
  return head, transcript
47
 
48
 
49
+ def create_meeting_notes(transcript_file):
50
  # read config variables
51
+ # if not os.getenv("OPENAI_API_KEY"):
52
+ # os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key']
53
+ # transcript_filepath = config['OPTIONAL']['transcript-filepath']
54
+ # notes_filepath = config['OPTIONAL']['notes-filepath']
 
 
55
 
56
+ head, transcript = load_transcript(transcript_file)
57
 
58
  # split the transcript on the 5-min timestamps
59
+ regex_pattern = r"[0-9]{2}:[0-9]{2}:0{2}"
60
  five_min_chunks = re.split(regex_pattern, transcript)
61
 
62
  # create a textsplitter to subdivide those chunks into appropriately sized chunks.
63
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # list the meeting time and the chunks associated with it
66
  timestamped_summaries = []
 
73
  sub_chunks = text_splitter.split_text(five_minutes_chunk)
74
 
75
  summaries = []
76
+ for j, chunk in enumerate(sub_chunks, 1):
77
  summaries.append(chain.run(chunk))
78
  print(f"{timestamp}: Chunk {j}/{len(sub_chunks)}")
79
 
 
85
 
86
  first_line = re.split(r"[()]", head[0])
87
 
88
+ # Transcript Notes
89
+ meeting_notes = f'''# {first_line[0]}
90
+ {first_line[1]}
91
+ ## Attendees
92
+ {head[2]}## Meeting Notes
93
+ '''
94
+ for timestamp, summaries in timestamped_summaries:
95
+ meeting_notes += f'### {timestamp}\n'
96
+ for summary in summaries:
97
+ meeting_notes += f"- {summary.strip()}\n"
98
+ meeting_notes += "\nEnd of Meeting"
99
+
100
+ return meeting_notes
101
+
102
+ # with open(notes_filepath, 'w+') as f:
103
+ # f.write(meeting_notes)
104
+
105
+ # print(f"Export to file {notes_filepath} completed")
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ