df_align / app copy.py
Fer-geo's picture
get gdf
4bdf2dd
import streamlit as st
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import geopandas as gpd
from difflib import get_close_matches
import tempfile
from io import BytesIO
def convert_to_gdf(uploaded_file):
# Read the file using BytesIO
file_buffer = BytesIO(uploaded_file.read())
# Detect file type and load accordingly
if uploaded_file.name.endswith('.shp'):
gdf = gpd.read_file(file_buffer)
elif uploaded_file.name.endswith(('.geojson', '.json')):
gdf = gpd.read_file(file_buffer, driver='GeoJSON')
else:
raise ValueError("Unsupported file format")
return gdf
# add logo D:\Terradot\repos\crea-carbon-model\app\logo.jpg
st.sidebar.image('logo.jpg', width=200)
st.sidebar.title('Proyecto Crea')
st.sidebar.write('Solo uso interno')
# add sidebar with 2 upload buttons
st.sidebar.header('Upload Files')
uploaded_file = st.sidebar.file_uploader('Upload your shapefile', type=['shp', 'geojson', 'json'], disabled = True)
uploaded_file2 = st.sidebar.file_uploader('Upload your csv file', type=['csv'], disabled = True)
if uploaded_file is not None:
lotes_gdf = convert_to_gdf(uploaded_file)
st.write(lotes_gdf)
if uploaded_file2 is not None:
# read csv and create dataframe
obs_df_2023 = pd.read_csv(uploaded_file2)
# add Test button
test = True #st.sidebar.button('Test')
if 'key' not in st.session_state:
st.session_state['key'] = None
if 'lote_gdf' not in st.session_state:
st.session_state['lote_gdf'] = None
if test:
lotes_gdf = gpd.read_file('data/lotes espacio crea_empresa.shp', encoding='utf-8')
obs_df_2023 = pd.read_csv('data/obs_df_2023_12_1.csv')
obs_df_2023.fillna('-', inplace=True)
obs_df_2023.Campo = obs_df_2023.Campo.astype('str')
empresa_obs = obs_df_2023.EMPRESA.unique().tolist()
# create a state variable to hold the current value of key variable
col1, col2,col3 = st.columns(3)
with col1:
st.header('EMPRESA')
selected_company = st.selectbox(f'Seleccione empresa', empresa_obs, index= 0)
# filter dataframe by selected company
obs_df_2023 = obs_df_2023[obs_df_2023['EMPRESA'] == selected_company]
if st.session_state['lote_gdf'] is not None:
lotes_gdf = st.session_state['lote_gdf']
else:
lotes_gdf = lotes_gdf[lotes_gdf['empresa'] == selected_company]
st.session_state['lote_gdf'] = lotes_gdf
campo_obs = obs_df_2023.Campo.unique().tolist()
campo_gdf = lotes_gdf.campo.unique().tolist()
# Initialize an empty dictionary
similar_dict = {}
N = 3
CUTOFF = 0.72
# Loop through each item in the template list
for item in campo_gdf:
# normalize the stings to lowercase and remove punctuation in campo_obs
campo_obs_norm = [str(c).lower() for c in campo_obs]
campo_obs_norm = [c.replace('.', ' ') for c in campo_obs_norm]
# Find the most similar item in df_columns list
similar_items = get_close_matches(item, campo_obs_norm, N, CUTOFF)
# get the index of the most similar item
similar_items_idx = [campo_obs_norm.index(i) for i in similar_items]
# get the most similar item in the original list
similar_items = [campo_obs[i] for i in similar_items_idx]
# If a similar item is found, add to the dictionary
if similar_items:
similar_dict[item] = similar_items[0]
else:
# If no similar item is found, set value as "no match"
similar_dict[item] = "no match"
similar_dict_df = pd.DataFrame.from_dict(similar_dict, orient='index').reset_index()
similar_dict_df.columns = ['gdf','obs']
# campo_obs = [str(c) for c in campo_obs]
# campo_obs.sort(key=str.lower)
campo_obs.insert(0, 'no match')
all_keys = similar_dict_df['gdf'].unique().tolist()
# Fields
lotes_gdf['campo_obs'] = lotes_gdf['campo'].map(similar_dict)
cutoff = 0.3
def on_click_field(*args):
# key, field, selected_value = key
def inner():
# st.session_state['key'] = key
print(args)
return inner
def show_field(key):
key, selected_value = key
lote_obs = obs_df_2023[obs_df_2023['Campo'] == selected_value]['Lote'].unique().tolist()
lote_obs.insert(0, 'no match')
with col3:
# st.header(st.session_state['key'])
st.header('Lote')
df_field = lotes_gdf[lotes_gdf['campo'] == key]
fields = df_field['lote'].unique().tolist()
for j,field in enumerate(fields):
similar_items = get_close_matches(field, lote_obs, 3, 0.70)
default = similar_items[0] if similar_items else 0
# selected_value = st.multiselect(f'{field} (.shp):', lote_obs, default=default, key='field'+str(j))
selected_value = st.selectbox(f'{field} (.shp):', lote_obs, index = lote_obs.index(default) , key='field'+str(j), on_change=on_click_field(key, field, selected_value))
lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['lote'] == field), 'lote_obs'] = selected_value
# st.session_state['lote_gdf'] = lotes_gdf
def on_click(key):
def inner():
st.session_state['key'] = key
show_field(key)
return inner
with col2:
st.header('Campo')
for i, key in enumerate(all_keys):
selected_value = st.selectbox(f'{key}:', campo_obs, index=campo_obs.index(similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0]))
# selected_value = st.multiselect(f'{key} (.shp):', campo_obs, default=similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0], key=i)
lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0]
if selected_value:
similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = selected_value
value = selected_value
lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = selected_value
else:
# similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = 'no match'
value = similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs']
lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = value
st.session_state['lote_gdf'] = lotes_gdf
st.button('Show Fields', key=key, on_click=on_click([key,value]))
# st.dataframe(similar_dict_df)
# if st.button('Show Fields'):
# st.dataframe(similar_dict_df)
# add download button
st.sidebar.download_button(
label="Download GeoJSON",
data=lotes_gdf.to_json().encode('utf-8'),
file_name=f'{selected_company}.geojson',
# mime='text/csv',
mime = 'application/json',
)