aliosm commited on
Commit
04a7bcd
·
1 Parent(s): 98d6155

Initial space app

Browse files
Files changed (5) hide show
  1. .gitattributes +2 -0
  2. README.md +4 -4
  3. app.py +165 -0
  4. index.tsv +3 -0
  5. requirements.txt +4 -0
.gitattributes CHANGED
@@ -1,3 +1,4 @@
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
1
+ index.tsv filter=lfs diff=lfs merge=lfs -text
2
  *.7z filter=lfs diff=lfs merge=lfs -text
3
  *.arrow filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
 
34
  *.zip filter=lfs diff=lfs merge=lfs -text
35
  *.zst filter=lfs diff=lfs merge=lfs -text
36
  *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ index.tsv filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
  title: Waqfeya
3
- emoji: 🌍
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.26.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
1
  ---
2
  title: Waqfeya
3
+ emoji: 📚
4
+ colorFrom: pink
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.9.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import json
3
+ import urllib.parse
4
+
5
+ from pathlib import Path
6
+
7
+ import gradio as gr
8
+
9
+ from fuzzywuzzy import fuzz
10
+ from pyarabic.araby import strip_tashkeel
11
+
12
+
13
+ def main():
14
+ with gr.Blocks(
15
+ theme=gr.themes.Default(font=[gr.themes.GoogleFont('Noto Sans Arabic'), 'Arial', 'sans-serif']),
16
+ css='\n'.join([
17
+ 'html, body, .gradio-container { direction: rtl !important; }',
18
+ 'h1 { text-align: center; display: block; }',
19
+ 'th, td { text-align: right !important; }',
20
+ 'th span { white-space: nowrap !important; }',
21
+ '.icon-wrap { right: unset !important; left: var(--size-3) !important; }',
22
+ ])
23
+ ) as demo:
24
+ index_state = gr.State()
25
+ results_data = gr.State()
26
+
27
+ gr.Markdown('# ابحث في كتب المكتبة الوقفية 📚', rtl=True)
28
+
29
+ title = gr.Textbox(label='عنوان الكتاب', placeholder='اكتب عنوان الكتاب', lines=1, rtl=True)
30
+
31
+ with gr.Row():
32
+ category = gr.Dropdown(choices=['جارٍ التحميل...'], label='التصنيف (اختياري)', interactive=False)
33
+ author = gr.Dropdown(choices=['جارٍ التحميل...'], label='المؤلف (اختياري)', interactive=False)
34
+
35
+ search_button = gr.Button('ابحث')
36
+
37
+ gr.Markdown('## النتائج 🎯', rtl=True)
38
+
39
+ results = gr.Dataframe(headers=['#', 'العنوان', 'المؤلف', 'التصنيف', 'درجة التطابق'], interactive=False)
40
+
41
+ download_label = gr.Markdown('### تحميل ملفات الكتاب 📥', visible=False, rtl=True)
42
+ with gr.Tabs(visible=False) as details_box:
43
+ with gr.Tab('PDF'):
44
+ pdf_tab = gr.Markdown(rtl=True)
45
+ with gr.Tab('TXT'):
46
+ txt_tab = gr.Markdown(rtl=True)
47
+ with gr.Tab('DOCX'):
48
+ docx_tab = gr.Markdown(rtl=True)
49
+
50
+ def load_data():
51
+ _index = load_index()
52
+ _categories = get_categories(_index)
53
+ _authors = get_authors(_index)
54
+
55
+ return (
56
+ _index,
57
+ gr.update(choices=_categories, value=_categories[0], interactive=True),
58
+ gr.update(choices=_authors, value=_authors[0], interactive=True),
59
+ )
60
+
61
+ def show_details(evt: gr.SelectData, index_state, results_data):
62
+ book_details = index_state[results_data[evt.index[0]][0] - 1]
63
+
64
+ pdf_paths = generate_download_url(book_details[-4])
65
+ txt_paths = generate_download_url(book_details[-3])
66
+ docx_paths = generate_download_url(book_details[-2])
67
+
68
+ return [
69
+ gr.update(visible=True, value=pdf_paths),
70
+ gr.update(visible=True, value=txt_paths),
71
+ gr.update(visible=True, value=docx_paths),
72
+ gr.update(visible=True),
73
+ gr.update(visible=True),
74
+ ]
75
+
76
+ search_button.click(
77
+ fn=lambda t, c, a, idx: handle_search(idx, t, c, a),
78
+ inputs=[title, category, author, index_state],
79
+ outputs=[results, results_data, details_box],
80
+ )
81
+
82
+ results.select(
83
+ fn=show_details,
84
+ inputs=[index_state, results_data],
85
+ outputs=[pdf_tab, txt_tab, docx_tab, details_box, download_label],
86
+ )
87
+
88
+ demo.load(load_data, outputs=[index_state, category, author])
89
+
90
+ demo.launch()
91
+
92
+
93
+ def load_index():
94
+ with open('index.tsv', 'r', encoding='utf-8') as file:
95
+ data = list(csv.reader(file, delimiter='\t'))[1:]
96
+
97
+ for i in range(len(data)):
98
+ data[i] = [i + 1] + data[i] + [normalize_text(data[i][2])]
99
+
100
+ return data
101
+
102
+
103
+ def get_categories(index):
104
+ return [''] + sorted(set([row[1] for row in index if row[1]]))
105
+
106
+
107
+ def get_authors(index):
108
+ return [''] + sorted(set([row[2] for row in index if row[2]]))
109
+
110
+
111
+ def handle_search(index, title: str, category: str, author: str):
112
+ title = normalize_text(title)
113
+
114
+ if not title.strip():
115
+ return [[['', 'يرجى إدخال عنوان للبحث.', '', '', '']], [['', 'يرجى إدخال عنوان للبحث.', '', '', '']], gr.update(visible=False)]
116
+
117
+ filtered = index
118
+
119
+ if category and category != '':
120
+ filtered = [row for row in filtered if row[1] == category]
121
+
122
+ if author and author != '':
123
+ filtered = [row for row in filtered if row[2] == author]
124
+
125
+ scored_results = []
126
+ for row in filtered:
127
+ score = fuzz.partial_ratio(title, row[-1])
128
+
129
+ if score > 50:
130
+ scored_results.append((score, row))
131
+
132
+ if not scored_results:
133
+ return [[['', 'لم يتم العثور على نتائج مطابقة.', '', '', '']], [['', 'لم يتم العثور على نتائج مطابقة.', '', '', '']], gr.update(visible=False)]
134
+
135
+ scored_results.sort(reverse=True)
136
+
137
+ result_table = [[row[0], row[3], row[2], row[1], score] for score, row in scored_results[:100]]
138
+
139
+ return [result_table, result_table, gr.update(visible=False)]
140
+
141
+
142
+ def generate_download_url(paths):
143
+ formatted_paths = []
144
+
145
+ for path in json.loads(paths.replace("'", '"')):
146
+ encoded_path = f'https://huggingface.co/datasets/ieasybooks-org/waqfeya-library/resolve/main/{urllib.parse.quote(path[2:])}'
147
+ formatted_path = f'{encoded_path}?download=true'
148
+ formatted_paths.append(f'- [{Path(path).name}]({formatted_path})')
149
+
150
+ return '\n'.join(formatted_paths)
151
+
152
+
153
+ def normalize_text(text):
154
+ text = strip_tashkeel(text)
155
+ text = text.replace('أ', 'ا')
156
+ text = text.replace('إ', 'ا')
157
+ text = text.replace('آ', 'ا')
158
+ text = text.replace('ي', 'ى')
159
+ text = text.replace('ة', 'ه')
160
+
161
+ return text
162
+
163
+
164
+ if __name__ == '__main__':
165
+ main()
index.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c8c53cf6fbe2340fb6ebc3194c09c7b485d06adc627cc9d28a0c39e2c7a0df0
3
+ size 16784007
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ PyArabic==0.6.15
2
+ fuzzywuzzy==0.18.0
3
+ gradio==5.9.1
4
+ python-Levenshtein==0.27.1