Quintino Fernandes commited on
Commit
b9c04aa
·
1 Parent(s): 4a24dbd

1st sum model

Browse files
Files changed (2) hide show
  1. LexRank.py +121 -0
  2. requirements.txt +11 -1
LexRank.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LexRank implementation
3
+ Source: https://github.com/crabcamp/lexrank/tree/dev
4
+ """
5
+
6
+ import logging
7
+
8
+ import numpy as np
9
+ from scipy.sparse.csgraph import connected_components
10
+ from scipy.special import softmax
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def degree_centrality_scores(
16
+ similarity_matrix,
17
+ threshold=None,
18
+ increase_power=True,
19
+ ):
20
+ if not (threshold is None or isinstance(threshold, float) and 0 <= threshold < 1):
21
+ raise ValueError(
22
+ "'threshold' should be a floating-point number " "from the interval [0, 1) or None",
23
+ )
24
+
25
+ if threshold is None:
26
+ markov_matrix = create_markov_matrix(similarity_matrix)
27
+
28
+ else:
29
+ markov_matrix = create_markov_matrix_discrete(
30
+ similarity_matrix,
31
+ threshold,
32
+ )
33
+
34
+ scores = stationary_distribution(
35
+ markov_matrix,
36
+ increase_power=increase_power,
37
+ normalized=False,
38
+ )
39
+
40
+ return scores
41
+
42
+
43
+ def _power_method(transition_matrix, increase_power=True, max_iter=10000):
44
+ eigenvector = np.ones(len(transition_matrix))
45
+
46
+ if len(eigenvector) == 1:
47
+ return eigenvector
48
+
49
+ transition = transition_matrix.transpose()
50
+
51
+ for _ in range(max_iter):
52
+ eigenvector_next = np.dot(transition, eigenvector)
53
+
54
+ if np.allclose(eigenvector_next, eigenvector):
55
+ return eigenvector_next
56
+
57
+ eigenvector = eigenvector_next
58
+
59
+ if increase_power:
60
+ transition = np.dot(transition, transition)
61
+
62
+ logger.warning("Maximum number of iterations for power method exceeded without convergence!")
63
+ return eigenvector_next
64
+
65
+
66
+ def connected_nodes(matrix):
67
+ _, labels = connected_components(matrix)
68
+
69
+ groups = []
70
+
71
+ for tag in np.unique(labels):
72
+ group = np.where(labels == tag)[0]
73
+ groups.append(group)
74
+
75
+ return groups
76
+
77
+
78
+ def create_markov_matrix(weights_matrix):
79
+ n_1, n_2 = weights_matrix.shape
80
+ if n_1 != n_2:
81
+ raise ValueError("'weights_matrix' should be square")
82
+
83
+ row_sum = weights_matrix.sum(axis=1, keepdims=True)
84
+
85
+ # normalize probability distribution differently if we have negative transition values
86
+ if np.min(weights_matrix) <= 0:
87
+ return softmax(weights_matrix, axis=1)
88
+
89
+ return weights_matrix / row_sum
90
+
91
+
92
+ def create_markov_matrix_discrete(weights_matrix, threshold):
93
+ discrete_weights_matrix = np.zeros(weights_matrix.shape)
94
+ ixs = np.where(weights_matrix >= threshold)
95
+ discrete_weights_matrix[ixs] = 1
96
+
97
+ return create_markov_matrix(discrete_weights_matrix)
98
+
99
+
100
+ def stationary_distribution(
101
+ transition_matrix,
102
+ increase_power=True,
103
+ normalized=True,
104
+ ):
105
+ n_1, n_2 = transition_matrix.shape
106
+ if n_1 != n_2:
107
+ raise ValueError("'transition_matrix' should be square")
108
+
109
+ distribution = np.zeros(n_1)
110
+
111
+ grouped_indices = connected_nodes(transition_matrix)
112
+
113
+ for group in grouped_indices:
114
+ t_matrix = transition_matrix[np.ix_(group, group)]
115
+ eigenvector = _power_method(t_matrix, increase_power=increase_power)
116
+ distribution[group] = eigenvector
117
+
118
+ if normalized:
119
+ distribution /= n_1
120
+
121
+ return distribution
requirements.txt CHANGED
@@ -1,2 +1,12 @@
1
  fastapi
2
- uvicorn[standard]
 
 
 
 
 
 
 
 
 
 
 
1
  fastapi
2
+ uvicorn[standard]
3
+ transformers
4
+ torch
5
+ sentence_transformers
6
+ nltk
7
+ spacy
8
+ numpy
9
+ pandas
10
+ scipy
11
+ logging
12
+ psycopg2