Xianbao QIAN
commited on
Commit
·
9e2f4be
1
Parent(s):
071dac0
fetching from all authors, instead of just the first1
Browse files- src/pages/trend/index.tsx +4 -10
- src/utils/modelData.ts +110 -123
src/pages/trend/index.tsx
CHANGED
@@ -79,12 +79,9 @@ const TrendPage: React.FC<TrendProps> = ({ monthlyData = [], totalData = [], det
|
|
79 |
const providerData = Object.fromEntries(
|
80 |
Object.keys(PROVIDERS_MAP).map(provider => {
|
81 |
const providerMonthlyData = monthlyData.filter(d => {
|
82 |
-
const isDataset = d.name?.toLowerCase().includes('dataset') ||
|
83 |
-
d.name?.toLowerCase().includes('corpus') ||
|
84 |
-
d.name?.toLowerCase().includes('data');
|
85 |
const matchesContentType = contentType === 'all' ||
|
86 |
-
(contentType === 'datasets' && isDataset) ||
|
87 |
-
(contentType === 'models' && !isDataset);
|
88 |
return d.provider === provider && matchesContentType;
|
89 |
});
|
90 |
return [provider, providerMonthlyData || []];
|
@@ -94,12 +91,9 @@ const TrendPage: React.FC<TrendProps> = ({ monthlyData = [], totalData = [], det
|
|
94 |
// Filter and group detailed model data
|
95 |
const filteredModels = (detailedData || [])
|
96 |
.filter(model => {
|
97 |
-
const isDataset = model.name.toLowerCase().includes('dataset') ||
|
98 |
-
model.name.toLowerCase().includes('corpus') ||
|
99 |
-
model.name.toLowerCase().includes('data');
|
100 |
const matchesContentType = contentType === 'all' ||
|
101 |
-
(contentType === 'datasets' && isDataset) ||
|
102 |
-
(contentType === 'models' && !isDataset);
|
103 |
|
104 |
return model.likes >= minLikes &&
|
105 |
(selectedProviders.length === 0 || selectedProviders.includes(model.provider)) &&
|
|
|
79 |
const providerData = Object.fromEntries(
|
80 |
Object.keys(PROVIDERS_MAP).map(provider => {
|
81 |
const providerMonthlyData = monthlyData.filter(d => {
|
|
|
|
|
|
|
82 |
const matchesContentType = contentType === 'all' ||
|
83 |
+
(contentType === 'datasets' && d.isDataset) ||
|
84 |
+
(contentType === 'models' && !d.isDataset);
|
85 |
return d.provider === provider && matchesContentType;
|
86 |
});
|
87 |
return [provider, providerMonthlyData || []];
|
|
|
91 |
// Filter and group detailed model data
|
92 |
const filteredModels = (detailedData || [])
|
93 |
.filter(model => {
|
|
|
|
|
|
|
94 |
const matchesContentType = contentType === 'all' ||
|
95 |
+
(contentType === 'datasets' && model.isDataset) ||
|
96 |
+
(contentType === 'models' && !model.isDataset);
|
97 |
|
98 |
return model.likes >= minLikes &&
|
99 |
(selectedProviders.length === 0 || selectedProviders.includes(model.provider)) &&
|
src/utils/modelData.ts
CHANGED
@@ -4,6 +4,7 @@ export interface ModelData {
|
|
4 |
createdAt: string;
|
5 |
id: string;
|
6 |
likes?: number;
|
|
|
7 |
}
|
8 |
|
9 |
export interface Activity {
|
@@ -20,6 +21,8 @@ export interface MonthlyActivity {
|
|
20 |
date: string; // YYYY-MM format
|
21 |
count: number;
|
22 |
provider: string;
|
|
|
|
|
23 |
}
|
24 |
|
25 |
export interface DetailedModelData extends ModelData {
|
@@ -29,6 +32,7 @@ export interface DetailedModelData extends ModelData {
|
|
29 |
monthKey: string; // YYYY-MM
|
30 |
provider: string;
|
31 |
sortKey: string; // YYYY-MM
|
|
|
32 |
}
|
33 |
|
34 |
// Generates calendar data from model data
|
@@ -112,53 +116,33 @@ export const aggregateCalendarData = (calendarData: CalendarData): Activity[] =>
|
|
112 |
};
|
113 |
|
114 |
export const generateMonthlyData = (modelData: ModelData[]): MonthlyActivity[] => {
|
115 |
-
|
116 |
-
console.error('Model data is not an array:', modelData);
|
117 |
-
return [];
|
118 |
-
}
|
119 |
-
|
120 |
-
// Create a map to store counts for each provider and month
|
121 |
-
const monthlyMap: Record<string, Record<string, number>> = {};
|
122 |
|
123 |
-
modelData.forEach(
|
124 |
-
const
|
125 |
-
const
|
126 |
-
|
127 |
-
)?.[0];
|
128 |
|
129 |
-
if (
|
130 |
-
|
131 |
-
if (!monthlyMap[provider]) {
|
132 |
-
monthlyMap[provider] = {};
|
133 |
-
}
|
134 |
-
monthlyMap[provider][date] = (monthlyMap[provider][date] || 0) + 1;
|
135 |
}
|
136 |
-
});
|
137 |
-
|
138 |
-
// Convert the map to an array of monthly activities
|
139 |
-
const monthlyActivities: MonthlyActivity[] = [];
|
140 |
-
|
141 |
-
// Get all unique months across all providers
|
142 |
-
const allMonths = new Set<string>();
|
143 |
-
Object.values(monthlyMap).forEach(providerData => {
|
144 |
-
Object.keys(providerData).forEach(month => allMonths.add(month));
|
145 |
-
});
|
146 |
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
-
|
151 |
-
Object.entries(monthlyMap).forEach(([provider, data]) => {
|
152 |
-
sortedMonths.forEach(month => {
|
153 |
-
monthlyActivities.push({
|
154 |
-
date: month,
|
155 |
-
count: data[month] || 0,
|
156 |
-
provider
|
157 |
-
});
|
158 |
-
});
|
159 |
});
|
160 |
|
161 |
-
return
|
|
|
|
|
162 |
};
|
163 |
|
164 |
const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
|
@@ -182,104 +166,107 @@ async function fetchWithRetry(url: string, retries = 3, delayMs = 1000): Promise
|
|
182 |
throw new Error('Max retries reached');
|
183 |
}
|
184 |
|
185 |
-
export async
|
186 |
-
|
187 |
-
const allModels: ModelData[] = [];
|
188 |
-
const failedProviders: string[] = [];
|
189 |
|
190 |
for (const [provider, info] of Object.entries(PROVIDERS_MAP)) {
|
191 |
-
console.log(`Fetching
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
}
|
204 |
-
|
205 |
-
const models = await response.json();
|
206 |
-
console.log(`Received ${models.length} models for ${provider}`);
|
207 |
|
208 |
-
|
209 |
-
|
210 |
-
const date = new Date(model.createdAt);
|
211 |
-
return {
|
212 |
-
id: model.id,
|
213 |
-
name: model.modelId,
|
214 |
-
likes: model.likes || 0,
|
215 |
-
createdAt: model.createdAt,
|
216 |
-
provider,
|
217 |
-
monthKey: date.toLocaleDateString('en-US', {
|
218 |
-
year: 'numeric',
|
219 |
-
month: 'long'
|
220 |
-
}),
|
221 |
-
sortKey: `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`
|
222 |
-
};
|
223 |
-
})
|
224 |
-
);
|
225 |
-
|
226 |
-
// Add a small delay between requests to avoid rate limiting
|
227 |
-
await delay(500);
|
228 |
-
} catch (error) {
|
229 |
-
console.error(`Error fetching data for ${provider}:`, error);
|
230 |
-
failedProviders.push(provider);
|
231 |
}
|
232 |
}
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
if (dateCompare !== 0) return dateCompare;
|
243 |
-
// Then by likes for models in the same month
|
244 |
-
return b.likes - a.likes;
|
245 |
-
});
|
246 |
-
}
|
247 |
|
248 |
export function processDetailedModelData(models: ModelData[]): DetailedModelData[] {
|
249 |
if (!models || models.length === 0) {
|
250 |
-
console.log('No models to process');
|
251 |
return [];
|
252 |
}
|
253 |
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
});
|
270 |
-
|
271 |
-
const processed = Object.entries(groupedModels)
|
272 |
-
.sort((a, b) => {
|
273 |
-
const [monthKeyA, modelsA] = a;
|
274 |
-
const [monthKeyB, modelsB] = b;
|
275 |
-
return modelsB[0].sortKey.localeCompare(modelsA[0].sortKey);
|
276 |
-
})
|
277 |
-
.flatMap(([_, models]) => models);
|
278 |
-
|
279 |
-
console.log('Processed models:', processed.length);
|
280 |
-
console.log('Sample model:', processed[0]);
|
281 |
-
|
282 |
-
return processed;
|
283 |
}
|
284 |
|
285 |
// Helper function to get total monthly data across all providers
|
|
|
4 |
createdAt: string;
|
5 |
id: string;
|
6 |
likes?: number;
|
7 |
+
isDataset?: boolean;
|
8 |
}
|
9 |
|
10 |
export interface Activity {
|
|
|
21 |
date: string; // YYYY-MM format
|
22 |
count: number;
|
23 |
provider: string;
|
24 |
+
isDataset?: boolean;
|
25 |
+
name?: string;
|
26 |
}
|
27 |
|
28 |
export interface DetailedModelData extends ModelData {
|
|
|
32 |
monthKey: string; // YYYY-MM
|
33 |
provider: string;
|
34 |
sortKey: string; // YYYY-MM
|
35 |
+
isDataset: boolean;
|
36 |
}
|
37 |
|
38 |
// Generates calendar data from model data
|
|
|
116 |
};
|
117 |
|
118 |
export const generateMonthlyData = (modelData: ModelData[]): MonthlyActivity[] => {
|
119 |
+
const monthlyData: Record<string, Record<string, MonthlyActivity>> = {};
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
+
modelData.forEach(model => {
|
122 |
+
const date = new Date(model.createdAt);
|
123 |
+
const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
|
124 |
+
const provider = model.provider || 'unknown';
|
|
|
125 |
|
126 |
+
if (!monthlyData[monthKey]) {
|
127 |
+
monthlyData[monthKey] = {};
|
|
|
|
|
|
|
|
|
128 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
+
if (!monthlyData[monthKey][provider]) {
|
131 |
+
monthlyData[monthKey][provider] = {
|
132 |
+
date: monthKey,
|
133 |
+
count: 0,
|
134 |
+
provider,
|
135 |
+
isDataset: model.isDataset,
|
136 |
+
name: model.name
|
137 |
+
};
|
138 |
+
}
|
139 |
|
140 |
+
monthlyData[monthKey][provider].count++;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
});
|
142 |
|
143 |
+
return Object.values(monthlyData)
|
144 |
+
.flatMap(providerData => Object.values(providerData))
|
145 |
+
.sort((a, b) => a.date.localeCompare(b.date));
|
146 |
};
|
147 |
|
148 |
const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
|
|
|
166 |
throw new Error('Max retries reached');
|
167 |
}
|
168 |
|
169 |
+
export const fetchAllModelData = async (): Promise<ModelData[]> => {
|
170 |
+
const allData: ModelData[] = [];
|
|
|
|
|
171 |
|
172 |
for (const [provider, info] of Object.entries(PROVIDERS_MAP)) {
|
173 |
+
console.log(`Fetching data for provider: ${provider}`);
|
174 |
+
|
175 |
+
for (const author of info.authors) {
|
176 |
+
console.log(` Fetching data for author: ${author}`);
|
177 |
+
try {
|
178 |
+
// Fetch models
|
179 |
+
const modelResponse = await fetchWithRetry(
|
180 |
+
`https://huggingface.co/api/models?author=${author}&sort=likes&direction=-1&limit=10000`,
|
181 |
+
3,
|
182 |
+
2000
|
183 |
+
);
|
184 |
+
const modelData = await modelResponse.json();
|
185 |
+
|
186 |
+
// Fetch datasets
|
187 |
+
const datasetResponse = await fetchWithRetry(
|
188 |
+
`https://huggingface.co/api/datasets?author=${author}&sort=likes&direction=-1&limit=10000`,
|
189 |
+
3,
|
190 |
+
2000
|
191 |
+
);
|
192 |
+
const datasetData = await datasetResponse.json();
|
193 |
+
|
194 |
+
// Combine and process the data
|
195 |
+
const combinedData = [
|
196 |
+
...modelData.map((item: any) => ({
|
197 |
+
id: item.id,
|
198 |
+
name: item.id,
|
199 |
+
createdAt: item.createdAt,
|
200 |
+
likes: item.likes,
|
201 |
+
downloads: item.downloads,
|
202 |
+
isDataset: false,
|
203 |
+
provider
|
204 |
+
})),
|
205 |
+
...datasetData.map((item: any) => ({
|
206 |
+
id: item.id,
|
207 |
+
name: item.id,
|
208 |
+
createdAt: item.createdAt,
|
209 |
+
likes: item.likes,
|
210 |
+
downloads: item.downloads,
|
211 |
+
isDataset: true,
|
212 |
+
provider
|
213 |
+
}))
|
214 |
+
];
|
215 |
+
|
216 |
+
allData.push(...combinedData);
|
217 |
+
console.log(` Fetched ${combinedData.length} items (${modelData.length} models, ${datasetData.length} datasets) for ${author}`);
|
218 |
+
} catch (error) {
|
219 |
+
console.error(`Error fetching data for ${provider}/${author}:`, error);
|
220 |
}
|
|
|
|
|
|
|
221 |
|
222 |
+
// Add a delay between author requests to avoid rate limiting
|
223 |
+
await delay(1000);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
}
|
225 |
}
|
226 |
|
227 |
+
// Remove duplicates based on id
|
228 |
+
const uniqueData = Array.from(
|
229 |
+
new Map(allData.map(item => [item.id, item])).values()
|
230 |
+
);
|
231 |
+
|
232 |
+
console.log(`Total unique items fetched: ${uniqueData.length}`);
|
233 |
+
return uniqueData;
|
234 |
+
};
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
export function processDetailedModelData(models: ModelData[]): DetailedModelData[] {
|
237 |
if (!models || models.length === 0) {
|
|
|
238 |
return [];
|
239 |
}
|
240 |
|
241 |
+
return models.map(model => {
|
242 |
+
const date = new Date(model.createdAt);
|
243 |
+
const monthKey = date.toLocaleDateString('en-US', {
|
244 |
+
year: 'numeric',
|
245 |
+
month: 'long'
|
246 |
+
});
|
247 |
+
const sortKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
|
248 |
+
const [org] = model.id.split('/');
|
249 |
+
const provider = Object.entries(PROVIDERS_MAP).find(([_, info]) =>
|
250 |
+
info.authors.includes(org)
|
251 |
+
)?.[0] || 'unknown';
|
252 |
|
253 |
+
return {
|
254 |
+
...model,
|
255 |
+
name: model.name || model.id,
|
256 |
+
likes: model.likes || 0,
|
257 |
+
downloads: model.downloads || 0,
|
258 |
+
monthKey,
|
259 |
+
provider,
|
260 |
+
sortKey,
|
261 |
+
isDataset: model.isDataset || false
|
262 |
+
};
|
263 |
+
}).sort((a, b) => {
|
264 |
+
// First sort by sortKey (year-month) in descending order
|
265 |
+
const dateCompare = b.sortKey.localeCompare(a.sortKey);
|
266 |
+
if (dateCompare !== 0) return dateCompare;
|
267 |
+
// Then by likes for items in the same month
|
268 |
+
return (b.likes || 0) - (a.likes || 0);
|
269 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
}
|
271 |
|
272 |
// Helper function to get total monthly data across all providers
|