HuanzhiMao commited on
Commit
9a12753
·
verified ·
1 Parent(s): aad7801

update leaderboard data

Browse files
Files changed (1) hide show
  1. app.py +87 -87
app.py CHANGED
@@ -630,23 +630,7 @@ COLUMNS = [
630
  DATA = [
631
  (
632
  1,
633
- 83.80,
634
- "GPT-4-0125-Preview",
635
- "OpenAI",
636
- "Proprietary",
637
- 82.18,
638
- 90.00,
639
- 90.00,
640
- 91.00,
641
- 54.12,
642
- 70.00,
643
- 76.00,
644
- 55.00,
645
- 87.50,
646
- ),
647
- (
648
- 2,
649
- 83.55,
650
  "GPT-4-1106-Preview",
651
  "OpenAI",
652
  "Proprietary",
@@ -654,15 +638,31 @@ DATA = [
654
  89.50,
655
  92.00,
656
  92.00,
657
- 53.53,
658
  62.00,
659
  72.00,
660
  50.00,
661
  88.75,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
662
  ),
663
  (
664
  3,
665
- 83.55,
666
  "Gorilla-OpenFunctions-v2",
667
  "Gorilla LLM",
668
  "Apache 2.0",
@@ -670,7 +670,7 @@ DATA = [
670
  89.50,
671
  79.50,
672
  78.00,
673
- 78.82,
674
  74.00,
675
  76.00,
676
  60.00,
@@ -678,7 +678,7 @@ DATA = [
678
  ),
679
  (
680
  4,
681
- 81.63,
682
  "GPT-3.5-Turbo",
683
  "OpenAI",
684
  "Proprietary",
@@ -686,7 +686,7 @@ DATA = [
686
  88.00,
687
  87.50,
688
  88.00,
689
- 74.12,
690
  74.00,
691
  70.00,
692
  47.50,
@@ -694,7 +694,7 @@ DATA = [
694
  ),
695
  (
696
  5,
697
- 79.46,
698
  "Mistral-Medium",
699
  "Mistral AI",
700
  "Proprietary",
@@ -702,7 +702,7 @@ DATA = [
702
  84.50,
703
  71.00,
704
  68.00,
705
- 75.88,
706
  72.00,
707
  62.00,
708
  47.50,
@@ -710,7 +710,7 @@ DATA = [
710
  ),
711
  (
712
  6,
713
- 75.78,
714
  "Claude-2.1",
715
  "Anthropic",
716
  "Proprietary",
@@ -718,7 +718,7 @@ DATA = [
718
  83.00,
719
  72.00,
720
  56.50,
721
- 61.18,
722
  48.00,
723
  60.00,
724
  45.00,
@@ -726,7 +726,7 @@ DATA = [
726
  ),
727
  (
728
  7,
729
- 59.52,
730
  "Mistral-tiny",
731
  "Mistral AI",
732
  "Proprietary",
@@ -734,7 +734,7 @@ DATA = [
734
  59.50,
735
  53.50,
736
  41.50,
737
- 58.24,
738
  64.00,
739
  42.00,
740
  40.00,
@@ -742,7 +742,7 @@ DATA = [
742
  ),
743
  (
744
  8,
745
- 59.22,
746
  "Claude-instant",
747
  "Anthropic",
748
  "Proprietary",
@@ -750,7 +750,7 @@ DATA = [
750
  59.00,
751
  53.00,
752
  39.50,
753
- 51.76,
754
  52.00,
755
  50.00,
756
  37.50,
@@ -758,7 +758,23 @@ DATA = [
758
  ),
759
  (
760
  9,
761
- 55.84,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
  "Mistral-large",
763
  "Mistral AI",
764
  "Proprietary",
@@ -766,31 +782,31 @@ DATA = [
766
  90.50,
767
  4.00,
768
  0.00,
769
- 61.76,
770
  66.00,
771
  0.00,
772
  5.00,
773
  84.58,
774
  ),
775
  (
776
- 10,
777
- 54.99,
778
- "Gemini-1.0-Pro",
779
- "Google",
780
  "Proprietary",
781
- 78.43,
782
- 89.00,
783
- 4.00,
784
- 0.00,
785
- 46.12,
786
- 62.00,
787
- 0.00,
788
- 0.00,
789
- 77.50,
790
  ),
791
  (
792
- 11,
793
- 54.46,
794
  "Nexusflow-Raven-v2",
795
  "Nexusflow",
796
  "Apache 2.0",
@@ -798,15 +814,15 @@ DATA = [
798
  83.50,
799
  39.50,
800
  34.00,
801
- 45.88,
802
  78.00,
803
  68.00,
804
  45.00,
805
  0.00,
806
  ),
807
  (
808
- 12,
809
- 53.95,
810
  "FireFunction-v1",
811
  "Fireworks",
812
  "Apache 2.0",
@@ -814,31 +830,15 @@ DATA = [
814
  87.00,
815
  4.00,
816
  0.00,
817
- 48.24,
818
  64.00,
819
  0.00,
820
  5.00,
821
  81.25,
822
  ),
823
- (
824
- 13,
825
- 53.86,
826
- "Mistral-small",
827
- "Mistral AI",
828
- "Proprietary",
829
- 46.55,
830
- 68.00,
831
- 48.50,
832
- 58.00,
833
- 14.12,
834
- 30.00,
835
- 40.00,
836
- 37.50,
837
- 89.58,
838
- ),
839
  (
840
  14,
841
- 53.49,
842
  "GPT-4-0613",
843
  "OpenAI",
844
  "Proprietary",
@@ -846,7 +846,7 @@ DATA = [
846
  86.00,
847
  4.00,
848
  0.00,
849
- 37.65,
850
  50.00,
851
  0.00,
852
  0.00,
@@ -854,23 +854,7 @@ DATA = [
854
  ),
855
  (
856
  15,
857
- 44.46,
858
- "Gemma",
859
- "Google",
860
- "gemma-terms-of-use",
861
- 61.45,
862
- 60.00,
863
- 41.00,
864
- 32.00,
865
- 45.88,
866
- 46.00,
867
- 44.00,
868
- 25.00,
869
- 0.42,
870
- ),
871
- (
872
- 16,
873
- 43.19,
874
  "Deepseek-v1.5",
875
  "Deepseek",
876
  "Deepseek License",
@@ -878,12 +862,28 @@ DATA = [
878
  61.00,
879
  35.00,
880
  43.50,
881
- 5.29,
882
  2.00,
883
  0.00,
884
  7.50,
885
  66.25,
886
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
  (
888
  17,
889
  33.61,
 
630
  DATA = [
631
  (
632
  1,
633
+ 85.24,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
  "GPT-4-1106-Preview",
635
  "OpenAI",
636
  "Proprietary",
 
638
  89.50,
639
  92.00,
640
  92.00,
641
+ 70.00,
642
  62.00,
643
  72.00,
644
  50.00,
645
  88.75,
646
+ ),
647
+ (
648
+ 2,
649
+ 85.12,
650
+ "GPT-4-0125-Preview",
651
+ "OpenAI",
652
+ "Proprietary",
653
+ 82.18,
654
+ 90.00,
655
+ 90.00,
656
+ 91.00,
657
+ 67.06,
658
+ 70.00,
659
+ 76.00,
660
+ 55.00,
661
+ 87.50,
662
  ),
663
  (
664
  3,
665
+ 83.67,
666
  "Gorilla-OpenFunctions-v2",
667
  "Gorilla LLM",
668
  "Apache 2.0",
 
670
  89.50,
671
  79.50,
672
  78.00,
673
+ 80.00,
674
  74.00,
675
  76.00,
676
  60.00,
 
678
  ),
679
  (
680
  4,
681
+ 82.23,
682
  "GPT-3.5-Turbo",
683
  "OpenAI",
684
  "Proprietary",
 
686
  88.00,
687
  87.50,
688
  88.00,
689
+ 80.00,
690
  74.00,
691
  70.00,
692
  47.50,
 
694
  ),
695
  (
696
  5,
697
+ 79.70,
698
  "Mistral-Medium",
699
  "Mistral AI",
700
  "Proprietary",
 
702
  84.50,
703
  71.00,
704
  68.00,
705
+ 78.24,
706
  72.00,
707
  62.00,
708
  47.50,
 
710
  ),
711
  (
712
  6,
713
+ 76.02,
714
  "Claude-2.1",
715
  "Anthropic",
716
  "Proprietary",
 
718
  83.00,
719
  72.00,
720
  56.50,
721
+ 63.53,
722
  48.00,
723
  60.00,
724
  45.00,
 
726
  ),
727
  (
728
  7,
729
+ 60.06,
730
  "Mistral-tiny",
731
  "Mistral AI",
732
  "Proprietary",
 
734
  59.50,
735
  53.50,
736
  41.50,
737
+ 63.53,
738
  64.00,
739
  42.00,
740
  40.00,
 
742
  ),
743
  (
744
  8,
745
+ 59.70,
746
  "Claude-instant",
747
  "Anthropic",
748
  "Proprietary",
 
750
  59.00,
751
  53.00,
752
  39.50,
753
+ 56.47,
754
  52.00,
755
  50.00,
756
  37.50,
 
758
  ),
759
  (
760
  9,
761
+ 56.79,
762
+ "Gemini-1.0-Pro",
763
+ "Google",
764
+ "Proprietary",
765
+ 78.43,
766
+ 89.00,
767
+ 4.00,
768
+ 0.00,
769
+ 63.77,
770
+ 62.00,
771
+ 0.00,
772
+ 0.00,
773
+ 77.50,
774
+ ),
775
+ (
776
+ 10,
777
+ 56.39,
778
  "Mistral-large",
779
  "Mistral AI",
780
  "Proprietary",
 
782
  90.50,
783
  4.00,
784
  0.00,
785
+ 67.06,
786
  66.00,
787
  0.00,
788
  5.00,
789
  84.58,
790
  ),
791
  (
792
+ 11,
793
+ 55.72,
794
+ "Mistral-small",
795
+ "Mistral AI",
796
  "Proprietary",
797
+ 46.55,
798
+ 68.00,
799
+ 48.50,
800
+ 58.00,
801
+ 32.35,
802
+ 30.00,
803
+ 40.00,
804
+ 37.50,
805
+ 89.58,
806
  ),
807
  (
808
+ 12,
809
+ 55.72,
810
  "Nexusflow-Raven-v2",
811
  "Nexusflow",
812
  "Apache 2.0",
 
814
  83.50,
815
  39.50,
816
  34.00,
817
+ 58.24,
818
  78.00,
819
  68.00,
820
  45.00,
821
  0.00,
822
  ),
823
  (
824
+ 13,
825
+ 55.33,
826
  "FireFunction-v1",
827
  "Fireworks",
828
  "Apache 2.0",
 
830
  87.00,
831
  4.00,
832
  0.00,
833
+ 61.76,
834
  64.00,
835
  0.00,
836
  5.00,
837
  81.25,
838
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
839
  (
840
  14,
841
+ 54.16,
842
  "GPT-4-0613",
843
  "OpenAI",
844
  "Proprietary",
 
846
  86.00,
847
  4.00,
848
  0.00,
849
+ 44.12,
850
  50.00,
851
  0.00,
852
  0.00,
 
854
  ),
855
  (
856
  15,
857
+ 45.18,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  "Deepseek-v1.5",
859
  "Deepseek",
860
  "Deepseek License",
 
862
  61.00,
863
  35.00,
864
  43.50,
865
+ 24.70,
866
  2.00,
867
  0.00,
868
  7.50,
869
  66.25,
870
  ),
871
+ (
872
+ 16,
873
+ 44.34,
874
+ "Gemma",
875
+ "Google",
876
+ "gemma-terms-of-use",
877
+ 61.45,
878
+ 60.00,
879
+ 41.00,
880
+ 32.00,
881
+ 44.71,
882
+ 46.00,
883
+ 44.00,
884
+ 25.00,
885
+ 0.42,
886
+ ),
887
  (
888
  17,
889
  33.61,