Spaces:

singhsidhukuldeep
/

posts_leaderboard

Sleeping

App Files Files Community

posts_leaderboard / app.py

singhsidhukuldeep

Update app.py

897f09c verified 4 months ago

raw

history blame contribute delete

3.1 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	from datasets import load_dataset

	st.set_page_config(layout="wide")

	col1, col2 = st.columns([2, 3]) # Adjust the width ratio as needed

	sources = [
	"https://huggingface.co/datasets/cfahlgren1/hub-stats",
	"https://huggingface.co/datasets/maxiw/hf-posts",
	]

	with col1:
	st.header("HuggingFace 🤗 Posts leaderboard")

	with col2:
	selected_source = st.selectbox(
	"Data Source:",
	options=sources,
	index=0,
	)

	if selected_source == sources[0]:
	try:
	df = pd.read_parquet("hf://datasets/cfahlgren1/hub-stats/posts.parquet")
	# ds = load_dataset("cfahlgren1/hub-stats", "posts")
	# df = pd.DataFrame(ds['train']).info()
	df["Name"] = df.fullname
	df["username"] = df.name
	except Exception as exp:
	st.error(f'''
	ERROR>> in loading {selected_source}

	>> {exp}''', icon="🚨")
	selected_source = sources[1]
	st.info(f'''
	This can be solved by "Space Restart"

	Switching Sources for now...

	New Source: {selected_source}''', icon="ℹ️")




	if selected_source == sources[1]:
	df = pd.read_json("hf://datasets/maxiw/hf-posts/posts.jsonl", lines=True)

	df["publishedAt"] = pd.to_datetime(df.publishedAt)
	print(">>> ", df.columns)

	df["Name"] = df.author.apply(lambda x: x["fullname"])
	df["username"] = df.author.apply(lambda x: x["name"])

	# Define the metrics
	metrics = ["totalUniqueImpressions", "totalReactions", "numComments", "Num of posts"]


	# Get min and max dates from the DataFrame
	min_date = df["publishedAt"].min().to_pydatetime()
	max_date = df["publishedAt"].max().to_pydatetime()

	# Create columns for the slider and the selectbox
	col1, col2 = st.columns([3, 1]) # Adjust the width ratio as needed

	with col1:
	date_range = st.slider(
	"Select Date Range",
	min_value=min_date,
	max_value=max_date,
	value=(min_date, max_date),
	format="DD/MMM/YYYY",
	)

	with col2:
	selected_metric = st.selectbox(
	"Sort by:",
	options=metrics,
	index=0,
	)


	# Filter the DataFrame based on selected date range
	mask = df["publishedAt"].between(*date_range)
	df = df[mask]


	df["totalReactions"] = df.reactions.apply(lambda x: sum([_["count"] for _ in x]))
	df["Num of posts"] = 1

	# Ensure metrics columns are integers, handling NaN values
	df[metrics] = df[metrics].fillna(0).astype(int)

	data = (
	df.groupby(["username", "Name"])[metrics]
	.sum()
	.sort_values(selected_metric, ascending=False)
	.reset_index()
	)
	data.index = np.arange(1, len(data) + 1)
	data.index.name = "Rank"

	# Format metrics columns with commas
	data[metrics] = data[metrics].applymap(lambda x: f"{x:,}")


	def make_clickable(val):
	return f'<a target="_blank" href="https://huggingface.co/{val}">{val}</a>'


	df_styled = data.style.format({"username": make_clickable})
	st.write(
	f"""<center>{df_styled.to_html(escape=False, index=False)}""",
	unsafe_allow_html=True,
	)