Xianbao QIAN commited on
Commit
9e2f4be
·
1 Parent(s): 071dac0

fetching from all authors, instead of just the first1

Browse files
Files changed (2) hide show
  1. src/pages/trend/index.tsx +4 -10
  2. src/utils/modelData.ts +110 -123
src/pages/trend/index.tsx CHANGED
@@ -79,12 +79,9 @@ const TrendPage: React.FC<TrendProps> = ({ monthlyData = [], totalData = [], det
79
  const providerData = Object.fromEntries(
80
  Object.keys(PROVIDERS_MAP).map(provider => {
81
  const providerMonthlyData = monthlyData.filter(d => {
82
- const isDataset = d.name?.toLowerCase().includes('dataset') ||
83
- d.name?.toLowerCase().includes('corpus') ||
84
- d.name?.toLowerCase().includes('data');
85
  const matchesContentType = contentType === 'all' ||
86
- (contentType === 'datasets' && isDataset) ||
87
- (contentType === 'models' && !isDataset);
88
  return d.provider === provider && matchesContentType;
89
  });
90
  return [provider, providerMonthlyData || []];
@@ -94,12 +91,9 @@ const TrendPage: React.FC<TrendProps> = ({ monthlyData = [], totalData = [], det
94
  // Filter and group detailed model data
95
  const filteredModels = (detailedData || [])
96
  .filter(model => {
97
- const isDataset = model.name.toLowerCase().includes('dataset') ||
98
- model.name.toLowerCase().includes('corpus') ||
99
- model.name.toLowerCase().includes('data');
100
  const matchesContentType = contentType === 'all' ||
101
- (contentType === 'datasets' && isDataset) ||
102
- (contentType === 'models' && !isDataset);
103
 
104
  return model.likes >= minLikes &&
105
  (selectedProviders.length === 0 || selectedProviders.includes(model.provider)) &&
 
79
  const providerData = Object.fromEntries(
80
  Object.keys(PROVIDERS_MAP).map(provider => {
81
  const providerMonthlyData = monthlyData.filter(d => {
 
 
 
82
  const matchesContentType = contentType === 'all' ||
83
+ (contentType === 'datasets' && d.isDataset) ||
84
+ (contentType === 'models' && !d.isDataset);
85
  return d.provider === provider && matchesContentType;
86
  });
87
  return [provider, providerMonthlyData || []];
 
91
  // Filter and group detailed model data
92
  const filteredModels = (detailedData || [])
93
  .filter(model => {
 
 
 
94
  const matchesContentType = contentType === 'all' ||
95
+ (contentType === 'datasets' && model.isDataset) ||
96
+ (contentType === 'models' && !model.isDataset);
97
 
98
  return model.likes >= minLikes &&
99
  (selectedProviders.length === 0 || selectedProviders.includes(model.provider)) &&
src/utils/modelData.ts CHANGED
@@ -4,6 +4,7 @@ export interface ModelData {
4
  createdAt: string;
5
  id: string;
6
  likes?: number;
 
7
  }
8
 
9
  export interface Activity {
@@ -20,6 +21,8 @@ export interface MonthlyActivity {
20
  date: string; // YYYY-MM format
21
  count: number;
22
  provider: string;
 
 
23
  }
24
 
25
  export interface DetailedModelData extends ModelData {
@@ -29,6 +32,7 @@ export interface DetailedModelData extends ModelData {
29
  monthKey: string; // YYYY-MM
30
  provider: string;
31
  sortKey: string; // YYYY-MM
 
32
  }
33
 
34
  // Generates calendar data from model data
@@ -112,53 +116,33 @@ export const aggregateCalendarData = (calendarData: CalendarData): Activity[] =>
112
  };
113
 
114
  export const generateMonthlyData = (modelData: ModelData[]): MonthlyActivity[] => {
115
- if (!Array.isArray(modelData)) {
116
- console.error('Model data is not an array:', modelData);
117
- return [];
118
- }
119
-
120
- // Create a map to store counts for each provider and month
121
- const monthlyMap: Record<string, Record<string, number>> = {};
122
 
123
- modelData.forEach(item => {
124
- const [org] = item.id.split('/');
125
- const provider = Object.entries(PROVIDERS_MAP).find(([_, info]) =>
126
- info.authors.includes(org)
127
- )?.[0];
128
 
129
- if (provider) {
130
- const date = item.createdAt.substring(0, 7); // Get YYYY-MM
131
- if (!monthlyMap[provider]) {
132
- monthlyMap[provider] = {};
133
- }
134
- monthlyMap[provider][date] = (monthlyMap[provider][date] || 0) + 1;
135
  }
136
- });
137
-
138
- // Convert the map to an array of monthly activities
139
- const monthlyActivities: MonthlyActivity[] = [];
140
-
141
- // Get all unique months across all providers
142
- const allMonths = new Set<string>();
143
- Object.values(monthlyMap).forEach(providerData => {
144
- Object.keys(providerData).forEach(month => allMonths.add(month));
145
- });
146
 
147
- // Sort months chronologically
148
- const sortedMonths = Array.from(allMonths).sort();
 
 
 
 
 
 
 
149
 
150
- // Create entries for each provider and month
151
- Object.entries(monthlyMap).forEach(([provider, data]) => {
152
- sortedMonths.forEach(month => {
153
- monthlyActivities.push({
154
- date: month,
155
- count: data[month] || 0,
156
- provider
157
- });
158
- });
159
  });
160
 
161
- return monthlyActivities;
 
 
162
  };
163
 
164
  const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
@@ -182,104 +166,107 @@ async function fetchWithRetry(url: string, retries = 3, delayMs = 1000): Promise
182
  throw new Error('Max retries reached');
183
  }
184
 
185
- export async function fetchAllModelData(): Promise<ModelData[]> {
186
- console.log('Starting to fetch model data...');
187
- const allModels: ModelData[] = [];
188
- const failedProviders: string[] = [];
189
 
190
  for (const [provider, info] of Object.entries(PROVIDERS_MAP)) {
191
- console.log(`Fetching models for provider: ${provider}`);
192
- try {
193
- const response = await fetchWithRetry(
194
- `https://huggingface.co/api/models?author=${info.authors[0]}&sort=likes&direction=-1&limit=100`,
195
- 3,
196
- 2000 // 2 second delay between retries
197
- );
198
-
199
- if (!response.ok) {
200
- console.error(`Failed to fetch data for ${provider}:`, response.status, response.statusText);
201
- failedProviders.push(provider);
202
- continue;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  }
204
-
205
- const models = await response.json();
206
- console.log(`Received ${models.length} models for ${provider}`);
207
 
208
- allModels.push(
209
- ...models.map((model: any) => {
210
- const date = new Date(model.createdAt);
211
- return {
212
- id: model.id,
213
- name: model.modelId,
214
- likes: model.likes || 0,
215
- createdAt: model.createdAt,
216
- provider,
217
- monthKey: date.toLocaleDateString('en-US', {
218
- year: 'numeric',
219
- month: 'long'
220
- }),
221
- sortKey: `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`
222
- };
223
- })
224
- );
225
-
226
- // Add a small delay between requests to avoid rate limiting
227
- await delay(500);
228
- } catch (error) {
229
- console.error(`Error fetching data for ${provider}:`, error);
230
- failedProviders.push(provider);
231
  }
232
  }
233
 
234
- if (failedProviders.length > 0) {
235
- console.warn('Failed to fetch data for providers:', failedProviders);
236
- }
237
-
238
- console.log(`Total models fetched: ${allModels.length}`);
239
- return allModels.sort((a, b) => {
240
- // First sort by sortKey (year-month) in descending order
241
- const dateCompare = b.sortKey.localeCompare(a.sortKey);
242
- if (dateCompare !== 0) return dateCompare;
243
- // Then by likes for models in the same month
244
- return b.likes - a.likes;
245
- });
246
- }
247
 
248
  export function processDetailedModelData(models: ModelData[]): DetailedModelData[] {
249
  if (!models || models.length === 0) {
250
- console.log('No models to process');
251
  return [];
252
  }
253
 
254
- console.log('Processing detailed model data...');
255
- console.log('Input models:', models.length);
256
-
257
- // Group models by month
258
- const groupedModels = models.reduce<Record<string, DetailedModelData[]>>((acc, model) => {
259
- if (!acc[model.monthKey]) {
260
- acc[model.monthKey] = [];
261
- }
262
- acc[model.monthKey].push(model as DetailedModelData);
263
- return acc;
264
- }, {});
265
 
266
- // Sort each month's models by likes
267
- Object.values(groupedModels).forEach(monthModels => {
268
- monthModels.sort((a, b) => b.likes - a.likes);
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  });
270
-
271
- const processed = Object.entries(groupedModels)
272
- .sort((a, b) => {
273
- const [monthKeyA, modelsA] = a;
274
- const [monthKeyB, modelsB] = b;
275
- return modelsB[0].sortKey.localeCompare(modelsA[0].sortKey);
276
- })
277
- .flatMap(([_, models]) => models);
278
-
279
- console.log('Processed models:', processed.length);
280
- console.log('Sample model:', processed[0]);
281
-
282
- return processed;
283
  }
284
 
285
  // Helper function to get total monthly data across all providers
 
4
  createdAt: string;
5
  id: string;
6
  likes?: number;
7
+ isDataset?: boolean;
8
  }
9
 
10
  export interface Activity {
 
21
  date: string; // YYYY-MM format
22
  count: number;
23
  provider: string;
24
+ isDataset?: boolean;
25
+ name?: string;
26
  }
27
 
28
  export interface DetailedModelData extends ModelData {
 
32
  monthKey: string; // YYYY-MM
33
  provider: string;
34
  sortKey: string; // YYYY-MM
35
+ isDataset: boolean;
36
  }
37
 
38
  // Generates calendar data from model data
 
116
  };
117
 
118
  export const generateMonthlyData = (modelData: ModelData[]): MonthlyActivity[] => {
119
+ const monthlyData: Record<string, Record<string, MonthlyActivity>> = {};
 
 
 
 
 
 
120
 
121
+ modelData.forEach(model => {
122
+ const date = new Date(model.createdAt);
123
+ const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
124
+ const provider = model.provider || 'unknown';
 
125
 
126
+ if (!monthlyData[monthKey]) {
127
+ monthlyData[monthKey] = {};
 
 
 
 
128
  }
 
 
 
 
 
 
 
 
 
 
129
 
130
+ if (!monthlyData[monthKey][provider]) {
131
+ monthlyData[monthKey][provider] = {
132
+ date: monthKey,
133
+ count: 0,
134
+ provider,
135
+ isDataset: model.isDataset,
136
+ name: model.name
137
+ };
138
+ }
139
 
140
+ monthlyData[monthKey][provider].count++;
 
 
 
 
 
 
 
 
141
  });
142
 
143
+ return Object.values(monthlyData)
144
+ .flatMap(providerData => Object.values(providerData))
145
+ .sort((a, b) => a.date.localeCompare(b.date));
146
  };
147
 
148
  const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
 
166
  throw new Error('Max retries reached');
167
  }
168
 
169
+ export const fetchAllModelData = async (): Promise<ModelData[]> => {
170
+ const allData: ModelData[] = [];
 
 
171
 
172
  for (const [provider, info] of Object.entries(PROVIDERS_MAP)) {
173
+ console.log(`Fetching data for provider: ${provider}`);
174
+
175
+ for (const author of info.authors) {
176
+ console.log(` Fetching data for author: ${author}`);
177
+ try {
178
+ // Fetch models
179
+ const modelResponse = await fetchWithRetry(
180
+ `https://huggingface.co/api/models?author=${author}&sort=likes&direction=-1&limit=10000`,
181
+ 3,
182
+ 2000
183
+ );
184
+ const modelData = await modelResponse.json();
185
+
186
+ // Fetch datasets
187
+ const datasetResponse = await fetchWithRetry(
188
+ `https://huggingface.co/api/datasets?author=${author}&sort=likes&direction=-1&limit=10000`,
189
+ 3,
190
+ 2000
191
+ );
192
+ const datasetData = await datasetResponse.json();
193
+
194
+ // Combine and process the data
195
+ const combinedData = [
196
+ ...modelData.map((item: any) => ({
197
+ id: item.id,
198
+ name: item.id,
199
+ createdAt: item.createdAt,
200
+ likes: item.likes,
201
+ downloads: item.downloads,
202
+ isDataset: false,
203
+ provider
204
+ })),
205
+ ...datasetData.map((item: any) => ({
206
+ id: item.id,
207
+ name: item.id,
208
+ createdAt: item.createdAt,
209
+ likes: item.likes,
210
+ downloads: item.downloads,
211
+ isDataset: true,
212
+ provider
213
+ }))
214
+ ];
215
+
216
+ allData.push(...combinedData);
217
+ console.log(` Fetched ${combinedData.length} items (${modelData.length} models, ${datasetData.length} datasets) for ${author}`);
218
+ } catch (error) {
219
+ console.error(`Error fetching data for ${provider}/${author}:`, error);
220
  }
 
 
 
221
 
222
+ // Add a delay between author requests to avoid rate limiting
223
+ await delay(1000);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  }
225
  }
226
 
227
+ // Remove duplicates based on id
228
+ const uniqueData = Array.from(
229
+ new Map(allData.map(item => [item.id, item])).values()
230
+ );
231
+
232
+ console.log(`Total unique items fetched: ${uniqueData.length}`);
233
+ return uniqueData;
234
+ };
 
 
 
 
 
235
 
236
  export function processDetailedModelData(models: ModelData[]): DetailedModelData[] {
237
  if (!models || models.length === 0) {
 
238
  return [];
239
  }
240
 
241
+ return models.map(model => {
242
+ const date = new Date(model.createdAt);
243
+ const monthKey = date.toLocaleDateString('en-US', {
244
+ year: 'numeric',
245
+ month: 'long'
246
+ });
247
+ const sortKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
248
+ const [org] = model.id.split('/');
249
+ const provider = Object.entries(PROVIDERS_MAP).find(([_, info]) =>
250
+ info.authors.includes(org)
251
+ )?.[0] || 'unknown';
252
 
253
+ return {
254
+ ...model,
255
+ name: model.name || model.id,
256
+ likes: model.likes || 0,
257
+ downloads: model.downloads || 0,
258
+ monthKey,
259
+ provider,
260
+ sortKey,
261
+ isDataset: model.isDataset || false
262
+ };
263
+ }).sort((a, b) => {
264
+ // First sort by sortKey (year-month) in descending order
265
+ const dateCompare = b.sortKey.localeCompare(a.sortKey);
266
+ if (dateCompare !== 0) return dateCompare;
267
+ // Then by likes for items in the same month
268
+ return (b.likes || 0) - (a.likes || 0);
269
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  }
271
 
272
  // Helper function to get total monthly data across all providers