omkar56 commited on
Commit
f158972
·
1 Parent(s): 7050f20

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +58 -99
main.py CHANGED
@@ -1,111 +1,70 @@
1
- # import os
2
- # from fastapi import FastAPI, File, Request, UploadFile, Body, Depends, HTTPException
3
- # from fastapi.security.api_key import APIKeyHeader
4
- # from typing import Optional, Annotated
5
- # from fastapi.encoders import jsonable_encoder
6
- # from PIL import Image
7
- # import io
8
- # import cv2
9
- # import numpy as np
10
- # import pytesseract
11
- # from nltk.tokenize import sent_tokenize
12
- # from transformers import MarianMTModel, MarianTokenizer
13
 
14
- # API_KEY = os.environ.get("API_KEY")
15
 
16
- # app = FastAPI()
17
- # api_key_header = APIKeyHeader(name="api_key", auto_error=False)
18
 
19
- # def get_api_key(api_key: Optional[str] = Depends(api_key_header)):
20
- # if api_key is None or api_key != API_KEY:
21
- # raise HTTPException(status_code=401, detail="Unauthorized access")
22
- # return api_key
23
 
24
- # @app.post("/api/ocr", response_model=dict)
25
- # async def ocr(
26
- # api_key: str = Depends(get_api_key),
27
- # image: UploadFile = File(...),
28
- # # languages: list = Body(["eng"])
29
- # ):
30
- # try:
31
- # print("[1]",os.popen(f'cat /etc/debian_version').read())
32
- # print("[2]",os.popen(f'cat /etc/issue').read())
33
- # print("[3]",os.popen(f'apt search tesseract').read())
34
- # # content = await image.read()
35
- # # image = Image.open(BytesIO(content))
36
- # image_stream = io.BytesIO(image)
37
- # image_stream.seek(0)
38
- # file_bytes = np.asarray(bytearray(image_stream.read()), dtype=np.uint8)
39
- # frame = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
40
- # # label = read_img(frame)
41
- # print("[image]",frame)
42
- # if hasattr(pytesseract, "image_to_string"):
43
- # print("Image to string function is available")
44
- # else:
45
- # print("Image to string function is not available")
46
- # # text = pytesseract.image_to_string(image, lang="+".join(languages))
47
- # # text = pytesseract.image_to_string(image, lang = 'eng')
48
- # except Exception as e:
49
- # return {"error": str(e)}, 500
50
 
51
- # # return jsonable_encoder({"text": text})
52
- # return {"ImageText": "text"}
53
 
54
- # @app.post("/api/translate", response_model=dict)
55
- # async def translate(
56
- # api_key: str = Depends(get_api_key),
57
- # text: str = Body(...),
58
- # src: str = "en",
59
- # trg: str = "zh",
60
- # ):
61
- # if api_key != API_KEY:
62
- # return {"error": "Invalid API key"}, 401
63
 
64
- # tokenizer, model = get_model(src, trg)
65
 
66
- # translated_text = ""
67
- # for sentence in sent_tokenize(text):
68
- # translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
69
- # translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
70
 
71
- # return jsonable_encoder({"translated_text": translated_text})
72
 
73
- # def get_model(src: str, trg: str):
74
- # model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
75
- # tokenizer = MarianTokenizer.from_pretrained(model_name)
76
- # model = MarianMTModel.from_pretrained(model_name)
77
- # return tokenizer, model
78
 
79
 
80
- import numpy as np
81
- import sys, os
82
- from fastapi import FastAPI, UploadFile, File
83
- from starlette.requests import Request
84
- import io
85
- import cv2
86
- import pytesseract
87
- from pydantic import BaseModel
88
- def read_img(img):
89
- print("[img]", img)
90
- # text = pytesseract.image_to_string(img)
91
- text = 'bhooo'
92
- return(text)
93
-
94
- app = FastAPI()
95
- class ImageType(BaseModel):
96
- url: str
97
- @app.post("/api/ocr", response_model=dict)
98
- async def ocr(request: Request,
99
- file: bytes = File(...)
100
- ):
101
- # if request.method == "POST":
102
- try:
103
- image_stream = io.BytesIO(file)
104
- image_stream.seek(0)
105
- file_bytes = np.asarray(bytearray(image_stream.read()), dtype=np.uint8)
106
- frame = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
107
- label = read_img(frame)
108
- print("[label]",label)
109
- except Exception as e:
110
- return {"error": str(e)}, 500
111
- return {"ImageText" : label}
 
1
+ import os
2
+ from fastapi import FastAPI, File, Request, UploadFile, Body, Depends, HTTPException
3
+ from fastapi.security.api_key import APIKeyHeader
4
+ from typing import Optional, Annotated
5
+ from fastapi.encoders import jsonable_encoder
6
+ from PIL import Image
7
+ import io
8
+ import cv2
9
+ import numpy as np
10
+ import pytesseract
11
+ from nltk.tokenize import sent_tokenize
12
+ from transformers import MarianMTModel, MarianTokenizer
13
 
14
+ API_KEY = os.environ.get("API_KEY")
15
 
16
+ app = FastAPI()
17
+ api_key_header = APIKeyHeader(name="api_key", auto_error=False)
18
 
19
+ def get_api_key(api_key: Optional[str] = Depends(api_key_header)):
20
+ if api_key is None or api_key != API_KEY:
21
+ raise HTTPException(status_code=401, detail="Unauthorized access")
22
+ return api_key
23
 
24
+ @app.post("/api/ocr", response_model=dict)
25
+ async def ocr(
26
+ api_key: str = Depends(get_api_key),
27
+ image: UploadFile = File(...),
28
+ # languages: list = Body(["eng"])
29
+ ):
30
+ try:
31
+ content = await image.read()
32
+ image = Image.open(BytesIO(content))
33
+ print("[image]",image)
34
+ if hasattr(pytesseract, "image_to_string"):
35
+ print("Image to string function is available")
36
+ else:
37
+ print("Image to string function is not available")
38
+ # text = pytesseract.image_to_string(image, lang="+".join(languages))
39
+ # text = pytesseract.image_to_string(image, lang = 'eng')
40
+ except Exception as e:
41
+ return {"error": str(e)}, 500
 
 
 
 
 
 
 
 
42
 
43
+ return {"ImageText": "text"}
 
44
 
45
+ @app.post("/api/translate", response_model=dict)
46
+ async def translate(
47
+ api_key: str = Depends(get_api_key),
48
+ text: str = Body(...),
49
+ src: str = "en",
50
+ trg: str = "zh",
51
+ ):
52
+ if api_key != API_KEY:
53
+ return {"error": "Invalid API key"}, 401
54
 
55
+ tokenizer, model = get_model(src, trg)
56
 
57
+ translated_text = ""
58
+ for sentence in sent_tokenize(text):
59
+ translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
60
+ translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
61
 
62
+ return jsonable_encoder({"translated_text": translated_text})
63
 
64
+ def get_model(src: str, trg: str):
65
+ model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
66
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
67
+ model = MarianMTModel.from_pretrained(model_name)
68
+ return tokenizer, model
69
 
70