ZakoST commited on
Commit
f599167
·
1 Parent(s): 4246107
Files changed (2) hide show
  1. app.py +1 -0
  2. execution.ipynb +483 -0
app.py CHANGED
@@ -30,6 +30,7 @@ def initialize(name):
30
  articles = pd.merge(df, articles, on='item_id', how='left')
31
 
32
  count_principale = df.groupby('categorie_principale').item_id.count()
 
33
  df['categorie_secondaire'] = df.apply(lambda x : x.categorie_secondaire.split(',')[0] if x.categorie_secondaire!=None else None, axis=1)
34
  count_secondaire = df.groupby('categorie_secondaire').item_id.count()
35
  display_principale = count_principale.reset_index()
 
30
  articles = pd.merge(df, articles, on='item_id', how='left')
31
 
32
  count_principale = df.groupby('categorie_principale').item_id.count()
33
+ print(f"Name : {name}\n Data : {df}")
34
  df['categorie_secondaire'] = df.apply(lambda x : x.categorie_secondaire.split(',')[0] if x.categorie_secondaire!=None else None, axis=1)
35
  count_secondaire = df.groupby('categorie_secondaire').item_id.count()
36
  display_principale = count_principale.reset_index()
execution.ipynb CHANGED
@@ -710,6 +710,489 @@
710
  " retrieve_classifications(name, mapping)"
711
  ]
712
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
  {
714
  "cell_type": "markdown",
715
  "metadata": {},
 
710
  " retrieve_classifications(name, mapping)"
711
  ]
712
  },
713
+ {
714
+ "cell_type": "code",
715
+ "execution_count": 38,
716
+ "metadata": {},
717
+ "outputs": [],
718
+ "source": [
719
+ "articles = pd.read_csv('data/extract_sciences_po.csv')\n",
720
+ "\n",
721
+ "with open(\"data/outputs/output_favarel_et_al.txt\", 'r') as f : \n",
722
+ " out_dict = json.loads(f.read())\n",
723
+ "\n",
724
+ "\n",
725
+ "df = pd.DataFrame.from_dict(out_dict)\n",
726
+ "\n",
727
+ "articles = pd.merge(df, articles, on='item_id', how='left')\n",
728
+ "\n",
729
+ "count_principale = df.groupby('categorie_principale').item_id.count()\n",
730
+ "df['categorie_secondaire'] = df.apply(lambda x : x.categorie_secondaire.split(',')[0] if x.categorie_secondaire!=None else None, axis=1)\n",
731
+ "\n",
732
+ " "
733
+ ]
734
+ },
735
+ {
736
+ "cell_type": "code",
737
+ "execution_count": 40,
738
+ "metadata": {},
739
+ "outputs": [
740
+ {
741
+ "data": {
742
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
743
+ "columns": [
744
+ {
745
+ "name": "index",
746
+ "rawType": "int64",
747
+ "type": "integer"
748
+ },
749
+ {
750
+ "name": "item_id",
751
+ "rawType": "object",
752
+ "type": "string"
753
+ },
754
+ {
755
+ "name": "categorie_principale",
756
+ "rawType": "object",
757
+ "type": "string"
758
+ },
759
+ {
760
+ "name": "categorie_secondaire",
761
+ "rawType": "object",
762
+ "type": "string"
763
+ }
764
+ ],
765
+ "conversionMethod": "pd.DataFrame",
766
+ "ref": "224a4e83-124d-4710-9d6a-6deb122e17de",
767
+ "rows": [
768
+ [
769
+ "0",
770
+ "I4OEKQ6MHRBP3LQVVYDDXW6T6U",
771
+ "UPDATE ME",
772
+ "EDUCATE ME"
773
+ ],
774
+ [
775
+ "1",
776
+ "LVA4MZBQOBECNPZD323NV6O7K4",
777
+ "INSPIRE ME",
778
+ "EDUCATE ME"
779
+ ],
780
+ [
781
+ "2",
782
+ "4FAEHUUZ5ZFAJKLFEV2LT5CBAQ",
783
+ "EDUCATE ME",
784
+ "GIVE ME PERSPECTIVE"
785
+ ],
786
+ [
787
+ "3",
788
+ "4S4G6BKFRNER3LB22CLPAEWWKY",
789
+ "GIVE ME PERSPECTIVE",
790
+ "INSPIRE ME"
791
+ ],
792
+ [
793
+ "4",
794
+ "ZAFHRNAHJVC6THXRSBMCB4A24I",
795
+ "INSPIRE ME",
796
+ "EDUCATE ME"
797
+ ],
798
+ [
799
+ "5",
800
+ "ESEADAHDJVGQRC2S3JHSZEVWHM",
801
+ "UPDATE ME",
802
+ "GIVE ME PERSPECTIVE"
803
+ ],
804
+ [
805
+ "6",
806
+ "FZHZS32EOZG5FAK7IKOJIB3J4I",
807
+ "EDUCATE ME",
808
+ "GIVE ME PERSPECTIVE"
809
+ ],
810
+ [
811
+ "7",
812
+ "H4JJGHBTL5DDFNNNETBE5T7FLI",
813
+ "UPDATE ME",
814
+ "EDUCATE ME"
815
+ ],
816
+ [
817
+ "8",
818
+ "JTND25UK2NA6HL2DVJRX46VCVM",
819
+ "EDUCATE ME",
820
+ "GIVE ME PERSPECTIVE"
821
+ ],
822
+ [
823
+ "9",
824
+ "LFRXBXICUFFULNKZUG4JN4E2NY",
825
+ "INSPIRE ME",
826
+ "EDUCATE ME"
827
+ ],
828
+ [
829
+ "10",
830
+ "O5546IGISVGOLAPLOUIXAHXATE",
831
+ "EDUCATE ME",
832
+ "GIVE ME PERSPECTIVE"
833
+ ],
834
+ [
835
+ "11",
836
+ "5LRGJN3CONBZ3OIGARTGZX7X34",
837
+ "UPDATE ME",
838
+ "GIVE ME PERSPECTIVE"
839
+ ],
840
+ [
841
+ "12",
842
+ "A2CXALUPDVHDZP22PBL4YMMGVU",
843
+ "EDUCATE ME",
844
+ "GIVE ME PERSPECTIVE"
845
+ ],
846
+ [
847
+ "13",
848
+ "6T5ESGV6CNFA5L3TIEO475EO2I",
849
+ "GIVE ME PERSPECTIVE",
850
+ "EDUCATE ME"
851
+ ],
852
+ [
853
+ "14",
854
+ "KGJGC5V6OJCJJEVO4D3MP5QA4Q",
855
+ "EDUCATE ME",
856
+ "GIVE ME PERSPECTIVE"
857
+ ],
858
+ [
859
+ "15",
860
+ "ASBHDD73HZANDM5W3WRKXGCCGI",
861
+ "KEEP ME ON TREND",
862
+ "EDUCATE ME"
863
+ ],
864
+ [
865
+ "16",
866
+ "YL7UVGJLQZALVK5PCV2LZLPTDQ",
867
+ "INSPIRE ME",
868
+ "ENTERTAIN ME"
869
+ ],
870
+ [
871
+ "17",
872
+ "2T6XZA7OHRD6RFCGPA3ZVCNJ7A",
873
+ "GIVE ME PERSPECTIVE",
874
+ "EDUCATE ME"
875
+ ],
876
+ [
877
+ "18",
878
+ "V3THEIGVXBDADLDXUKJZTRLLUQ",
879
+ "UPDATE ME",
880
+ "GIVE ME PERSPECTIVE"
881
+ ],
882
+ [
883
+ "19",
884
+ "Z3WQ6BXPKRATPCGSKMROOOMQ44",
885
+ "UPDATE ME",
886
+ "EDUCATE ME"
887
+ ],
888
+ [
889
+ "20",
890
+ "TUR55MJTKRA7TNJOOP62XW7NIM",
891
+ "INSPIRE ME",
892
+ "EDUCATE ME"
893
+ ],
894
+ [
895
+ "21",
896
+ "YGI5VTCGIJFDNOA5C4G5BOITH4",
897
+ "EDUCATE ME",
898
+ "GIVE ME PERSPECTIVE"
899
+ ],
900
+ [
901
+ "22",
902
+ "S2FVMMUVK5BS3HK5YN7PNU2LZE",
903
+ "UPDATE ME",
904
+ "GIVE ME PERSPECTIVE"
905
+ ],
906
+ [
907
+ "23",
908
+ "ZASS6FXGGVGCHFSSZKZXQ4AG2U",
909
+ "EDUCATE ME",
910
+ "INSPIRE ME"
911
+ ],
912
+ [
913
+ "24",
914
+ "2AZTSRRBUNBHNNHU5DCMRGG6WY",
915
+ "GIVE ME PERSPECTIVE",
916
+ "EDUCATE ME"
917
+ ],
918
+ [
919
+ "25",
920
+ "AKDZ7PAIQVBN5OENKR5AQ6YGNI",
921
+ "KEEP ME ON TREND",
922
+ "ENTERTAIN ME"
923
+ ],
924
+ [
925
+ "26",
926
+ "M4RJIWTMK5BVPAWNFEHGXZSLRY",
927
+ "INSPIRE ME",
928
+ "ENTERTAIN ME"
929
+ ],
930
+ [
931
+ "27",
932
+ "WLTP7ZAZ7JFUDJG2LNO5E7APZA",
933
+ "EDUCATE ME",
934
+ "GIVE ME PERSPECTIVE"
935
+ ],
936
+ [
937
+ "28",
938
+ "DCY4O24FUBAZZB3KOTIMMBA6MY",
939
+ "EDUCATE ME",
940
+ "GIVE ME PERSPECTIVE"
941
+ ],
942
+ [
943
+ "29",
944
+ "FHFB7Z3XYBH25LO7XDGHAEQ7N4",
945
+ "UPDATE ME",
946
+ "EDUCATE ME"
947
+ ],
948
+ [
949
+ "30",
950
+ "BPNRZFZPAVA3HPUOXUKXEUR5RY",
951
+ "EDUCATE ME",
952
+ "GIVE ME PERSPECTIVE"
953
+ ],
954
+ [
955
+ "31",
956
+ "NWGRYEOIQ5ADRDRXYZCU5C3VDA",
957
+ "INSPIRE ME",
958
+ "EDUCATE ME"
959
+ ],
960
+ [
961
+ "32",
962
+ "GORDMTE47FA5FEUA3U76M5VPOY",
963
+ "EDUCATE ME",
964
+ "UPDATE ME"
965
+ ],
966
+ [
967
+ "33",
968
+ "XFBX7NDGN5CMPL3OU5ZXFF6YCE",
969
+ "UPDATE ME",
970
+ "EDUCATE ME"
971
+ ],
972
+ [
973
+ "34",
974
+ "VYEPVCBK4ZCV7D3SW35GZ2Y5DM",
975
+ "EDUCATE ME",
976
+ "INSPIRE ME"
977
+ ],
978
+ [
979
+ "35",
980
+ "F5TBC6SGHRGRFJGZXZYG73I2C4",
981
+ "EDUCATE ME",
982
+ "UPDATE ME"
983
+ ],
984
+ [
985
+ "36",
986
+ "ZFZG6QOFBRFE3BL4VCI72HFQKQ",
987
+ "GIVE ME PERSPECTIVE",
988
+ "EDUCATE ME"
989
+ ],
990
+ [
991
+ "37",
992
+ "ZGPFATVT4ZFJZMX7LDXE3VS7RM",
993
+ "INSPIRE ME",
994
+ "GIVE ME PERSPECTIVE"
995
+ ],
996
+ [
997
+ "38",
998
+ "UBVFCLZ6OZCP5DAAYCBW2CDPJI",
999
+ "EDUCATE ME",
1000
+ "GIVE ME PERSPECTIVE"
1001
+ ],
1002
+ [
1003
+ "39",
1004
+ "WYEBGIAY5JAUFLB4GH6IXVYP34",
1005
+ "UPDATE ME",
1006
+ "EDUCATE ME"
1007
+ ],
1008
+ [
1009
+ "40",
1010
+ "DXW7WX7ALVFBZNPFU6QKHEH3G4",
1011
+ "UPDATE ME",
1012
+ "EDUCATE ME"
1013
+ ],
1014
+ [
1015
+ "41",
1016
+ "QOFNZRU2TVES7GMVVPMZ5LSI2E",
1017
+ "EDUCATE ME",
1018
+ "INSPIRE ME"
1019
+ ],
1020
+ [
1021
+ "42",
1022
+ "WBJI7FFINZHCPC5QZL2NFY6XWE",
1023
+ "INSPIRE ME",
1024
+ "EDUCATE ME"
1025
+ ],
1026
+ [
1027
+ "43",
1028
+ "IYAAHFU3PVFOPAN6XIJDPQ5XLQ",
1029
+ "UPDATE ME",
1030
+ "EDUCATE ME"
1031
+ ],
1032
+ [
1033
+ "44",
1034
+ "SNYUYV4G5BAODBTQWJBRG5JZKM",
1035
+ "EDUCATE ME",
1036
+ "GIVE ME PERSPECTIVE"
1037
+ ],
1038
+ [
1039
+ "45",
1040
+ "PMBTTRISTNDN3P3ANDECQKSEDE",
1041
+ "EDUCATE ME",
1042
+ "GIVE ME PERSPECTIVE"
1043
+ ],
1044
+ [
1045
+ "46",
1046
+ "UHYPJFKFJNHVPHVRFV7OHNDU64",
1047
+ "GIVE ME PERSPECTIVE",
1048
+ "EDUCATE ME"
1049
+ ],
1050
+ [
1051
+ "47",
1052
+ "ESRPEZYCA5AG7HIXUIVMPHDFVQ",
1053
+ "EDUCATE ME",
1054
+ "GIVE ME PERSPECTIVE"
1055
+ ],
1056
+ [
1057
+ "48",
1058
+ "HQW7TUOY7NHUVMCLNSDSEY5X4I",
1059
+ "KEEP ME ON TREND",
1060
+ "EDUCATE ME"
1061
+ ],
1062
+ [
1063
+ "49",
1064
+ "C3BA2XUBXRG5RBNHJUSDUKP4RI",
1065
+ "ENTERTAIN ME",
1066
+ "KEEP ME ON TREND"
1067
+ ]
1068
+ ],
1069
+ "shape": {
1070
+ "columns": 3,
1071
+ "rows": 516
1072
+ }
1073
+ },
1074
+ "text/html": [
1075
+ "<div>\n",
1076
+ "<style scoped>\n",
1077
+ " .dataframe tbody tr th:only-of-type {\n",
1078
+ " vertical-align: middle;\n",
1079
+ " }\n",
1080
+ "\n",
1081
+ " .dataframe tbody tr th {\n",
1082
+ " vertical-align: top;\n",
1083
+ " }\n",
1084
+ "\n",
1085
+ " .dataframe thead th {\n",
1086
+ " text-align: right;\n",
1087
+ " }\n",
1088
+ "</style>\n",
1089
+ "<table border=\"1\" class=\"dataframe\">\n",
1090
+ " <thead>\n",
1091
+ " <tr style=\"text-align: right;\">\n",
1092
+ " <th></th>\n",
1093
+ " <th>item_id</th>\n",
1094
+ " <th>categorie_principale</th>\n",
1095
+ " <th>categorie_secondaire</th>\n",
1096
+ " </tr>\n",
1097
+ " </thead>\n",
1098
+ " <tbody>\n",
1099
+ " <tr>\n",
1100
+ " <th>0</th>\n",
1101
+ " <td>I4OEKQ6MHRBP3LQVVYDDXW6T6U</td>\n",
1102
+ " <td>UPDATE ME</td>\n",
1103
+ " <td>EDUCATE ME</td>\n",
1104
+ " </tr>\n",
1105
+ " <tr>\n",
1106
+ " <th>1</th>\n",
1107
+ " <td>LVA4MZBQOBECNPZD323NV6O7K4</td>\n",
1108
+ " <td>INSPIRE ME</td>\n",
1109
+ " <td>EDUCATE ME</td>\n",
1110
+ " </tr>\n",
1111
+ " <tr>\n",
1112
+ " <th>2</th>\n",
1113
+ " <td>4FAEHUUZ5ZFAJKLFEV2LT5CBAQ</td>\n",
1114
+ " <td>EDUCATE ME</td>\n",
1115
+ " <td>GIVE ME PERSPECTIVE</td>\n",
1116
+ " </tr>\n",
1117
+ " <tr>\n",
1118
+ " <th>3</th>\n",
1119
+ " <td>4S4G6BKFRNER3LB22CLPAEWWKY</td>\n",
1120
+ " <td>GIVE ME PERSPECTIVE</td>\n",
1121
+ " <td>INSPIRE ME</td>\n",
1122
+ " </tr>\n",
1123
+ " <tr>\n",
1124
+ " <th>4</th>\n",
1125
+ " <td>ZAFHRNAHJVC6THXRSBMCB4A24I</td>\n",
1126
+ " <td>INSPIRE ME</td>\n",
1127
+ " <td>EDUCATE ME</td>\n",
1128
+ " </tr>\n",
1129
+ " <tr>\n",
1130
+ " <th>...</th>\n",
1131
+ " <td>...</td>\n",
1132
+ " <td>...</td>\n",
1133
+ " <td>...</td>\n",
1134
+ " </tr>\n",
1135
+ " <tr>\n",
1136
+ " <th>511</th>\n",
1137
+ " <td>AOT254SA2VDIDNF4YW7XPLWJ5E</td>\n",
1138
+ " <td>INSPIRE ME</td>\n",
1139
+ " <td>ENTERTAIN ME</td>\n",
1140
+ " </tr>\n",
1141
+ " <tr>\n",
1142
+ " <th>512</th>\n",
1143
+ " <td>GUOUKHLPFZBK7GVR5XU7MXVD5A</td>\n",
1144
+ " <td>INSPIRE ME</td>\n",
1145
+ " <td>EDUCATE ME</td>\n",
1146
+ " </tr>\n",
1147
+ " <tr>\n",
1148
+ " <th>513</th>\n",
1149
+ " <td>5HT6C24ZBVDOBFXPLA4HNVOTT4</td>\n",
1150
+ " <td>EDUCATE ME</td>\n",
1151
+ " <td>UPDATE ME</td>\n",
1152
+ " </tr>\n",
1153
+ " <tr>\n",
1154
+ " <th>514</th>\n",
1155
+ " <td>VLV6RSQ6U5E6XJ6AIRV26AEKO4</td>\n",
1156
+ " <td>UPDATE ME</td>\n",
1157
+ " <td>EDUCATE ME</td>\n",
1158
+ " </tr>\n",
1159
+ " <tr>\n",
1160
+ " <th>515</th>\n",
1161
+ " <td>FVCJ6DQ5HVDNDGC4F6F276NVFM</td>\n",
1162
+ " <td>UPDATE ME</td>\n",
1163
+ " <td>GIVE ME PERSPECTIVE</td>\n",
1164
+ " </tr>\n",
1165
+ " </tbody>\n",
1166
+ "</table>\n",
1167
+ "<p>516 rows × 3 columns</p>\n",
1168
+ "</div>"
1169
+ ],
1170
+ "text/plain": [
1171
+ " item_id categorie_principale categorie_secondaire\n",
1172
+ "0 I4OEKQ6MHRBP3LQVVYDDXW6T6U UPDATE ME EDUCATE ME\n",
1173
+ "1 LVA4MZBQOBECNPZD323NV6O7K4 INSPIRE ME EDUCATE ME\n",
1174
+ "2 4FAEHUUZ5ZFAJKLFEV2LT5CBAQ EDUCATE ME GIVE ME PERSPECTIVE\n",
1175
+ "3 4S4G6BKFRNER3LB22CLPAEWWKY GIVE ME PERSPECTIVE INSPIRE ME\n",
1176
+ "4 ZAFHRNAHJVC6THXRSBMCB4A24I INSPIRE ME EDUCATE ME\n",
1177
+ ".. ... ... ...\n",
1178
+ "511 AOT254SA2VDIDNF4YW7XPLWJ5E INSPIRE ME ENTERTAIN ME\n",
1179
+ "512 GUOUKHLPFZBK7GVR5XU7MXVD5A INSPIRE ME EDUCATE ME\n",
1180
+ "513 5HT6C24ZBVDOBFXPLA4HNVOTT4 EDUCATE ME UPDATE ME\n",
1181
+ "514 VLV6RSQ6U5E6XJ6AIRV26AEKO4 UPDATE ME EDUCATE ME\n",
1182
+ "515 FVCJ6DQ5HVDNDGC4F6F276NVFM UPDATE ME GIVE ME PERSPECTIVE\n",
1183
+ "\n",
1184
+ "[516 rows x 3 columns]"
1185
+ ]
1186
+ },
1187
+ "execution_count": 40,
1188
+ "metadata": {},
1189
+ "output_type": "execute_result"
1190
+ }
1191
+ ],
1192
+ "source": [
1193
+ "df"
1194
+ ]
1195
+ },
1196
  {
1197
  "cell_type": "markdown",
1198
  "metadata": {},