OxbridgeEconomics
commited on
Update utils.py
Browse files
utils.py
CHANGED
@@ -71,30 +71,30 @@ def update_reference(report):
|
|
71 |
)
|
72 |
print(response)
|
73 |
|
74 |
-
def download_files_from_s3(folder):
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
|
99 |
def extract_from_pdf_by_pattern(url, pattern):
|
100 |
"""
|
|
|
71 |
)
|
72 |
print(response)
|
73 |
|
74 |
+
# def download_files_from_s3(folder):
|
75 |
+
# """
|
76 |
+
# Downloads Parquet files from an S3 bucket and returns a concatenated DataFrame.
|
77 |
+
|
78 |
+
# Args:
|
79 |
+
# folder (str): The folder in the S3 bucket to download files from.
|
80 |
+
|
81 |
+
# Returns:
|
82 |
+
# pandas.DataFrame: A concatenated DataFrame containing the data from the downloaded Parquet files.
|
83 |
+
# """
|
84 |
+
# if not os.path.exists(folder):
|
85 |
+
# os.makedirs(folder)
|
86 |
+
# client = boto3.client(
|
87 |
+
# 's3',
|
88 |
+
# aws_access_key_id=AWS_ACCESS_KEY_ID,
|
89 |
+
# aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
|
90 |
+
# )
|
91 |
+
# response = client.list_objects_v2(Bucket='china-securities-report', Prefix=f"{folder}/")
|
92 |
+
# for obj in response['Contents']:
|
93 |
+
# key = obj['Key']
|
94 |
+
# if key.endswith('.parquet'):
|
95 |
+
# client.download_file('china-securities-report', key, key)
|
96 |
+
# file_paths = glob.glob(os.path.join(folder, '*.parquet'))
|
97 |
+
# return pd.concat([pd.read_parquet(file_path) for file_path in file_paths], ignore_index=True)
|
98 |
|
99 |
def extract_from_pdf_by_pattern(url, pattern):
|
100 |
"""
|