Sumit Kumar commited on
Commit
2937f6c
·
1 Parent(s): 1175fd3

Add captcha resolution functionality and update requirements

Browse files

- Implement `resolve_captcha` function in `captcha.py` to decode images and extract text using a pre-trained model.
- Add new endpoint `/resolve_captcha` in `app.py` for handling captcha resolution requests.
- Update `requirements.txt` to include necessary dependencies for image processing and model inference.
- Create `.gitignore` file to exclude `__pycache__` from version control.

Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +21 -1
  3. captcha.py +38 -0
  4. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py CHANGED
@@ -1,7 +1,14 @@
1
  from fastapi import FastAPI
 
 
2
 
3
  app = FastAPI()
4
 
 
 
 
 
 
5
  @app.get("/")
6
  def greet_json():
7
  return {"Hello": "World!"}
@@ -9,4 +16,17 @@ def greet_json():
9
 
10
  @app.get("/greet/{name}")
11
  def greet_name(name: str):
12
- return {"Hello": name}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from captcha import resolve_captcha
3
+ from pydantic import BaseModel
4
 
5
  app = FastAPI()
6
 
7
+
8
+ class Item(BaseModel):
9
+ image_path: str
10
+
11
+
12
  @app.get("/")
13
  def greet_json():
14
  return {"Hello": "World!"}
 
16
 
17
  @app.get("/greet/{name}")
18
  def greet_name(name: str):
19
+ return {"Hello": name}
20
+
21
+
22
+ @app.post("/resolve_captcha")
23
+ def decode_captcha(item: Item):
24
+ """
25
+ Decode the captcha image and return the text.
26
+ """
27
+ try:
28
+ result = resolve_captcha(item.image_path)
29
+ return {"captcha_text": result}
30
+ except Exception as e:
31
+ return {"error": str(e)}
32
+
captcha.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
3
+
4
+ from transformers import VisionEncoderDecoderModel, TrOCRProcessor
5
+ from PIL import Image
6
+ import io
7
+ import base64
8
+
9
+ # Load model and processor
10
+ processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3", use_fast=True)
11
+ model = VisionEncoderDecoderModel.from_pretrained(
12
+ "anuashok/ocr-captcha-v3")
13
+
14
+ def resolve_captcha(image_path):
15
+ # Check if input is base64 string
16
+ if isinstance(image_path, str) and image_path.startswith('data:image'):
17
+ # Extract the base64 data after the comma
18
+ base64_data = image_path.split(',')[1]
19
+ # Decode base64 to bytes
20
+ image_bytes = base64.b64decode(base64_data)
21
+ # Create PIL Image from bytes
22
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGBA")
23
+ else:
24
+ # Handle as regular file path
25
+ image = Image.open(image_path).convert("RGBA")
26
+
27
+ background = Image.new("RGBA", image.size, (255, 255, 255))
28
+ combined = Image.alpha_composite(background, image).convert("RGB")
29
+
30
+ # Prepare image for the model
31
+ pixel_values = processor(combined, return_tensors="pt").pixel_values
32
+
33
+ # Generate text
34
+ generated_ids = model.generate(pixel_values)
35
+ generated_text = processor.batch_decode(
36
+ generated_ids, skip_special_tokens=True)[0]
37
+
38
+ return generated_text
requirements.txt CHANGED
@@ -1,2 +1,7 @@
1
  fastapi
2
  uvicorn[standard]
 
 
 
 
 
 
1
  fastapi
2
  uvicorn[standard]
3
+ transformers
4
+ pillow
5
+ tensorflow
6
+ torch
7
+ torchvision