diff --git a/all_functions.py b/all_functions.py index c9032369058ed5eaa5137be39936d0eca63bf63d..346863f36b1bf619cf191ec93bcc41c9993150f8 100644 --- a/all_functions.py +++ b/all_functions.py @@ -402,7 +402,7 @@ def plot_union_polygon(tile_id, union_polygon, tile_polygon): list_polygon.append(tile_polygon) draw_polygon(tile_id, list_polygon, line_style_list= ['b-', 'r--'], fill_style_list=['lightblue', 'white'], plotNow=False) -def completeness_check(df, tile_id= [], if_printout = True): +def tile_completeness_check_with_all_acquisitions(df, tile_id= [], if_printout = True): df_fmask = df.loc[df['download'].str.contains("FMask", case=False, na=False)] df_fmask.index = range(0,len(df_fmask.index)) df_acq_coords = df_fmask['acq_coords'] @@ -412,7 +412,7 @@ def completeness_check(df, tile_id= [], if_printout = True): union_polygon = Polygon([]) acquisitions_coords = [] if_complete = False - if_complete = "incomplete" + completeness_check = "incomplete" ids = [] for idx in df_fmask.index: cloud_coverage_current = df_fmask['cloud'].loc[idx] @@ -433,6 +433,51 @@ def completeness_check(df, tile_id= [], if_printout = True): date_from = min(df['date']) status_message = f"{tile_id}, from {date_from} to {date_to}, maximum cloud coverage: {cloud_coverage_max}, {completeness_check}!" if if_printout: print(status_message, end="\r") + if not if_complete: print(status_message) + return if_complete, ids, date_from, date_to, cloud_coverage_max + # plot_union_polygon(tile_id, union_polygon, tile_polygon) +def tile_completeness_check_with_two_acquisitions(df, tile_id= [], if_printout = True): + df_fmask = df.loc[df['download'].str.contains("FMask", case=False, na=False)] + df_fmask.index = range(0,len(df_fmask.index)) + df_acq_coords = df_fmask['acq_coords'] + tile_coords = ast.literal_eval(df_fmask['tile_coords'].loc[0]) + tile_polygon = Polygon(tile_coords) + cloud_coverage_max = 0 + completeness_check = "incomplete" + if_complete = False + ids = "" + for first_idx in range(0, len(df_fmask)-1): + ids = [df_fmask['id'].loc[first_idx]] + print("here: ", ids) + union_polygon = Polygon([]) + if_complete = False + cloud_coverage_first = df_fmask['cloud'].loc[first_idx] + for second_idx in range(first_idx,len(df_fmask)): + cloud_coverage_second = df_fmask['cloud'].loc[second_idx] + acq_coords = ast.literal_eval(df_fmask['acq_coords'].loc[second_idx]) + # coord_tmp = [[float(coord_tmp[i+1]),float(coord_tmp[i])] for i in range(0,int(len(coord_tmp)),2)] + acquisition_polygon = Polygon(acq_coords) + union_polygon = union_polygon.union(acquisition_polygon) + union_polygon = tile_polygon.intersection(union_polygon) + polygon_surface_relative_diff_percent = (tile_polygon.area - union_polygon.area) / tile_polygon.area*100 + if polygon_surface_relative_diff_percent < 1e-1: + if_complete = True + ids.append(df_fmask['id'].loc[second_idx]) + cloud_coverage_max = max(cloud_coverage_first, cloud_coverage_second) + print(cloud_coverage_max) + break + if if_complete: + print(cloud_coverage_max) + completeness_check = "complete" + break + + print(cloud_coverage_max) +# date_to = max(df_fmask['date'].loc[first_idx], df_fmask['date'].loc[second_idx]) +# date_from = min(df_fmask['date'].loc[first_idx], df_fmask['date'].loc[second_idx]) + date_from = "2020-01-01" + date_to = "2024-12-31" + status_message = f"{tile_id}, from {date_from} to {date_to}, maximum cloud coverage: {cloud_coverage_max}, {completeness_check}!" + if if_printout: print(status_message, end="\r") # print(status_message) return if_complete, ids, date_from, date_to, cloud_coverage_max # plot_union_polygon(tile_id, union_polygon, tile_polygon) @@ -530,7 +575,7 @@ def plot_histogram_of_tiles(time_interval_list, cloud_coverage_max_list, cloud_s # axs[0].set_title('Histogram of numbr of days') axs[0].hist(data, density=False, bins=num_bins, color='blue', edgecolor='blue') - axs[0].set_xlabel('Number of days') + axs[0].set_xlabel('Acquisition interval (months)') axs[0].set_ylabel('Number of tiles') data = cloud_coverage_max_list @@ -539,6 +584,6 @@ def plot_histogram_of_tiles(time_interval_list, cloud_coverage_max_list, cloud_s axs[1].hist(data, bins=num_bins, color='blue', edgecolor='blue') axs[1].set_xlabel('Cloud coverage (%)') axs[1].set_ylabel('Number of tiles') - fig.suptitle(f"Cloud coverage step size: {cloud_step_size} (%)", fontsize=16) +# fig.suptitle(f"Cloud coverage step size: {cloud_step_size} (%)", fontsize=16) # plt.show() fig.savefig(image_output_file, bbox_inches='tight') diff --git a/check_query_lists.py b/check_query_lists.py index f9fbcde78b43ac8a99522d7b58f61b87676b058b..79d9a56ed0785dbebb72119fac316b2f7e702dcb 100755 --- a/check_query_lists.py +++ b/check_query_lists.py @@ -5,7 +5,8 @@ import argparse import os from all_functions import plot_histogram_of_tiles from all_functions import filter_bands -from all_functions import completeness_check +from all_functions import filter_ids +from all_functions import tile_completeness_check_with_all_acquisitions as tile_completeness_check from all_functions import time_elapsed def analyze_query_list(cloud_coverage_step = 10): @@ -24,20 +25,26 @@ def analyze_query_list(cloud_coverage_step = 10): # for tile_id in tile_id_list: print(tile_id) df_selected = pd.DataFrame(columns=df.columns) df = df_band_filtered + print(f"Clodud step size:{cloud_coverage_step}, number of files:{len(df)}") time_interval_list = [] cloud_coverage_max_list = [] incomplete_tile_list = [] for tile_id in tile_id_list: df_tile = df.loc[df['tile'] == tile_id] - if_complete, ids, date_from, date_to, cloud_coverage_max_current = completeness_check(df_tile, tile_id, if_printout = False) - # df_tile_selected = df_tile[df_tile.apply(lambda row: filter_ids(row, ids), axis=1)] - # df_selected = pd.concat([df_selected, df_tile_selected], ignore_index=True) + if_complete, ids, date_from, date_to, cloud_coverage_max_current = tile_completeness_check(df_tile, tile_id, if_printout = False) + df_tile_selected = df_tile[df_tile.apply(lambda row: filter_ids(row, ids), axis=1)] + df_selected = pd.concat([df_selected, df_tile_selected], ignore_index=True) if not if_complete: incomplete_tile_list.append(tile_id) months_elapsed, days_elapsed = time_elapsed(date_from, date_to) time_interval_list.append(months_elapsed) cloud_coverage_max_list.append(cloud_coverage_max_current) - image_output_file = f"histogram_cloud-step-size-{cloud_coverage_step}.pdf" + image_output_dir = os.environ["HOME"]+"/git/jugit/3d-abc-slides/hls-download-pipeline/figs" + image_output_directory="~/git/jugit/3d-abc-slides/hls-download-pipeline/figs" + image_output_file = f"histogram-cloud-step-size-{cloud_coverage_step}.png" + if os.path.isdir(image_output_dir): + image_output_file = image_output_dir+"/"+image_output_file + df_selected.to_csv("final_"+input_file, header=True, index=False) plot_histogram_of_tiles(time_interval_list, cloud_coverage_max_list, cloud_coverage_step, image_output_file) if len(incomplete_tile_list) > 0: df_incomplete = pd.DataFrame(incomplete_tile_list, columns=['tile']) diff --git a/download_hls.py b/download_hls.py.old similarity index 100% rename from download_hls.py rename to download_hls.py.old