import streamlit as st import warnings warnings.filterwarnings('ignore') import pandas as pd import geopandas as gpd from difflib import get_close_matches import tempfile from io import BytesIO def convert_to_gdf(uploaded_file): # Read the file using BytesIO file_buffer = BytesIO(uploaded_file.read()) # Detect file type and load accordingly if uploaded_file.name.endswith('.shp'): gdf = gpd.read_file(file_buffer) elif uploaded_file.name.endswith(('.geojson', '.json')): gdf = gpd.read_file(file_buffer, driver='GeoJSON') else: raise ValueError("Unsupported file format") return gdf # add logo D:\Terradot\repos\crea-carbon-model\app\logo.jpg st.sidebar.image('logo.jpg', width=200) st.sidebar.title('Proyecto Crea') st.sidebar.write('Solo uso interno') # add sidebar with 2 upload buttons st.sidebar.header('Upload Files') uploaded_file = st.sidebar.file_uploader('Upload your shapefile', type=['shp', 'geojson', 'json'], disabled = True) uploaded_file2 = st.sidebar.file_uploader('Upload your csv file', type=['csv'], disabled = True) if uploaded_file is not None: lotes_gdf = convert_to_gdf(uploaded_file) st.write(lotes_gdf) if uploaded_file2 is not None: # read csv and create dataframe obs_df_2023 = pd.read_csv(uploaded_file2) # add Test button test = True #st.sidebar.button('Test') if 'key' not in st.session_state: st.session_state['key'] = None if 'lote_gdf' not in st.session_state: st.session_state['lote_gdf'] = None if test: lotes_gdf = gpd.read_file('data/lotes espacio crea_empresa.shp', encoding='utf-8') obs_df_2023 = pd.read_csv('data/obs_df_2023_12_1.csv') obs_df_2023.fillna('-', inplace=True) obs_df_2023.Campo = obs_df_2023.Campo.astype('str') empresa_obs = obs_df_2023.EMPRESA.unique().tolist() # create a state variable to hold the current value of key variable col1, col2,col3 = st.columns(3) with col1: st.header('EMPRESA') selected_company = st.selectbox(f'Seleccione empresa', empresa_obs, index= 0) # filter dataframe by selected company obs_df_2023 = obs_df_2023[obs_df_2023['EMPRESA'] == selected_company] if st.session_state['lote_gdf'] is not None: lotes_gdf = st.session_state['lote_gdf'] else: lotes_gdf = lotes_gdf[lotes_gdf['empresa'] == selected_company] st.session_state['lote_gdf'] = lotes_gdf campo_obs = obs_df_2023.Campo.unique().tolist() campo_gdf = lotes_gdf.campo.unique().tolist() # Initialize an empty dictionary similar_dict = {} N = 3 CUTOFF = 0.72 # Loop through each item in the template list for item in campo_gdf: # normalize the stings to lowercase and remove punctuation in campo_obs campo_obs_norm = [str(c).lower() for c in campo_obs] campo_obs_norm = [c.replace('.', ' ') for c in campo_obs_norm] # Find the most similar item in df_columns list similar_items = get_close_matches(item, campo_obs_norm, N, CUTOFF) # get the index of the most similar item similar_items_idx = [campo_obs_norm.index(i) for i in similar_items] # get the most similar item in the original list similar_items = [campo_obs[i] for i in similar_items_idx] # If a similar item is found, add to the dictionary if similar_items: similar_dict[item] = similar_items[0] else: # If no similar item is found, set value as "no match" similar_dict[item] = "no match" similar_dict_df = pd.DataFrame.from_dict(similar_dict, orient='index').reset_index() similar_dict_df.columns = ['gdf','obs'] # campo_obs = [str(c) for c in campo_obs] # campo_obs.sort(key=str.lower) campo_obs.insert(0, 'no match') all_keys = similar_dict_df['gdf'].unique().tolist() # Fields lotes_gdf['campo_obs'] = lotes_gdf['campo'].map(similar_dict) cutoff = 0.3 def on_click_field(*args): # key, field, selected_value = key def inner(): # st.session_state['key'] = key print(args) return inner def show_field(key): key, selected_value = key lote_obs = obs_df_2023[obs_df_2023['Campo'] == selected_value]['Lote'].unique().tolist() lote_obs.insert(0, 'no match') with col3: # st.header(st.session_state['key']) st.header('Lote') df_field = lotes_gdf[lotes_gdf['campo'] == key] fields = df_field['lote'].unique().tolist() for j,field in enumerate(fields): similar_items = get_close_matches(field, lote_obs, 3, 0.70) default = similar_items[0] if similar_items else 0 # selected_value = st.multiselect(f'{field} (.shp):', lote_obs, default=default, key='field'+str(j)) selected_value = st.selectbox(f'{field} (.shp):', lote_obs, index = lote_obs.index(default) , key='field'+str(j), on_change=on_click_field(key, field, selected_value)) lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['lote'] == field), 'lote_obs'] = selected_value # st.session_state['lote_gdf'] = lotes_gdf def on_click(key): def inner(): st.session_state['key'] = key show_field(key) return inner with col2: st.header('Campo') for i, key in enumerate(all_keys): selected_value = st.selectbox(f'{key}:', campo_obs, index=campo_obs.index(similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0])) # selected_value = st.multiselect(f'{key} (.shp):', campo_obs, default=similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0], key=i) lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0] if selected_value: similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = selected_value value = selected_value lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = selected_value else: # similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = 'no match' value = similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = value st.session_state['lote_gdf'] = lotes_gdf st.button('Show Fields', key=key, on_click=on_click([key,value])) # st.dataframe(similar_dict_df) # if st.button('Show Fields'): # st.dataframe(similar_dict_df) # add download button st.sidebar.download_button( label="Download GeoJSON", data=lotes_gdf.to_json().encode('utf-8'), file_name=f'{selected_company}.geojson', # mime='text/csv', mime = 'application/json', )