Skip to content
Snippets Groups Projects
Commit 62d15ef3 authored by Ehsan's avatar Ehsan
Browse files

check the number of bands

parent 26cacaff
Branches
No related tags found
No related merge requests found
......@@ -8,6 +8,7 @@
from datetime import datetime
import math
# from osgeo import gdal
# from pyproj import Proj
# from pystac_client import Client
......@@ -363,30 +364,34 @@ def acquisition_download_links_to_csv(tile_id, output_file_name,
acq_idx = -1
for acquisition in acquisitions:
acq_idx += 1
for i in range(0,len(acquisition['links']),2):
if "https://data" in acquisition['links'][i]['href'] and "tif" in acquisition['links'][i]['href']:
download_links = download_links + [acquisition['links'][i]['href']]
link_list = [acquisition['links'][i]['href'] for i in range(len(acquisition['links']))]
df_current = pd.DataFrame(link_list, columns=["download"])
df_current = df_current.loc[df_current['download'].str.contains("https://data")]
df_current["product"] = [acquisition['producer_granule_id'][0:7]] * len(df_current)
df_current = df_current[df_current.apply(lambda row: filter_bands(row), axis=1)]
num_links = len(df_current)
# df_current["product"] = [acquisition['producer_granule_id'][0:7]] * num_links
# acq_idx += 1
# for i in range(0,len(acquisition['links']),2):
# if "https://data" in acquisition['links'][i]['href'] and "tif" in acquisition['links'][i]['href']:
# download_links = download_links + [acquisition['links'][i]['href']]
# download_links = download_links[0:2]
num_links = len(download_links)
df_current = pd.DataFrame(download_links, index=None, columns=["download"])
df_current["cloud"] = [acquisition['cloud_cover']] * num_links
df_current["id"] = [acquisition['producer_granule_id']] * num_links
df_current["date"] = [acquisition['time_start'][0:10]] * num_links
df_current["time"] = [acquisition['time_start'][11:-5]] * num_links
df_current["product"] = [acquisition['producer_granule_id'][0:7]] * num_links
df_current["tile"] = [tile_id] * num_links
df_current["acq_coords"] = "" * num_links
# print(len(acquisitions_coords), acq_idx)
# print(acquisitions_coords[acq_idx])
df_current["acq_coords"] = [acquisitions_coords[acq_idx]] * num_links
df_current["tile_coords"] =[tile_coords] * num_links
df_current.index = range(len(df_current))
if ifComplete and acquisition == acquisitions[-1]:
status = "complete"
df_current["status"] = [status] * num_links
df_current = df_current[df_columns]
df = pd.concat([df, df_current], ignore_index=True)
df_unique = df.drop_duplicates(subset=["download"], keep="first")
df = df_unique
# df_unique = df.drop_duplicates(subset=["download"], keep="first")
# df = df_unique
if True:
if os.path.isfile(output_file_name):
df.to_csv(output_file_name, mode = "a", header = False, index = False)
......@@ -436,53 +441,59 @@ def tile_completeness_check_with_all_acquisitions(df, tile_id= [], if_printout =
if not if_complete: print(status_message)
return if_complete, ids, date_from, date_to, cloud_coverage_max
# plot_union_polygon(tile_id, union_polygon, tile_polygon)
def tile_completeness_check_with_two_acquisitions(df, tile_id= [], if_printout = True):
df_fmask = df.loc[df['download'].str.contains("FMask", case=False, na=False)]
df_fmask.index = range(0,len(df_fmask.index))
df_acq_coords = df_fmask['acq_coords']
tile_coords = ast.literal_eval(df_fmask['tile_coords'].loc[0])
tile_polygon = Polygon(tile_coords)
cloud_coverage_max = 0
completeness_check = "incomplete"
if_complete = False
ids = ""
for first_idx in range(0, len(df_fmask)-1):
ids = [df_fmask['id'].loc[first_idx]]
print("here: ", ids)
union_polygon = Polygon([])
if_complete = False
cloud_coverage_first = df_fmask['cloud'].loc[first_idx]
for second_idx in range(first_idx,len(df_fmask)):
cloud_coverage_second = df_fmask['cloud'].loc[second_idx]
acq_coords = ast.literal_eval(df_fmask['acq_coords'].loc[second_idx])
# coord_tmp = [[float(coord_tmp[i+1]),float(coord_tmp[i])] for i in range(0,int(len(coord_tmp)),2)]
acquisition_polygon = Polygon(acq_coords)
union_polygon = union_polygon.union(acquisition_polygon)
union_polygon = tile_polygon.intersection(union_polygon)
polygon_surface_relative_diff_percent = (tile_polygon.area - union_polygon.area) / tile_polygon.area*100
if polygon_surface_relative_diff_percent < 1e-1:
if_complete = True
ids.append(df_fmask['id'].loc[second_idx])
cloud_coverage_max = max(cloud_coverage_first, cloud_coverage_second)
print(cloud_coverage_max)
break
if if_complete:
print(cloud_coverage_max)
completeness_check = "complete"
break
print(cloud_coverage_max)
# def tile_completeness_check_with_two_acquisitions(df, tile_id= [], if_printout = True):
# df_fmask = df.loc[df['download'].str.contains("FMask", case=False, na=False)]
# df_fmask.index = range(0,len(df_fmask.index))
# df_acq_coords = df_fmask['acq_coords']
# tile_coords = ast.literal_eval(df_fmask['tile_coords'].loc[0])
# tile_polygon = Polygon(tile_coords)
# cloud_coverage_max = 0
# completeness_check = "incomplete"
# if_complete = False
# for first_idx in range(0, len(df_fmask)-1):
# ids = [df_fmask['id'].loc[first_idx]]
# union_polygon = Polygon([])
# if_complete = False
# cloud_coverage_first = df_fmask['cloud'].loc[first_idx]
# for second_idx in range(first_idx,len(df_fmask)):
# cloud_coverage_second = df_fmask['cloud'].loc[second_idx]
# acq_coords = ast.literal_eval(df_fmask['acq_coords'].loc[second_idx])
# acquisition_polygon = Polygon(acq_coords)
# union_polygon = union_polygon.union(acquisition_polygon)
# union_polygon = tile_polygon.intersection(union_polygon)
# polygon_surface_relative_diff_percent = (tile_polygon.area - union_polygon.area) / tile_polygon.area*100
# if polygon_surface_relative_diff_percent < 1e-1:
# if_complete = True
# ids.append(df_fmask['id'].loc[second_idx])
# cloud_coverage_max = max(cloud_coverage_first, cloud_coverage_second)
# break
# if if_complete:
# completeness_check = "complete"
# break
#
# date_to = max(df_fmask['date'].loc[first_idx], df_fmask['date'].loc[second_idx])
# date_from = min(df_fmask['date'].loc[first_idx], df_fmask['date'].loc[second_idx])
date_from = "2020-01-01"
date_to = "2024-12-31"
status_message = f"{tile_id}, from {date_from} to {date_to}, maximum cloud coverage: {cloud_coverage_max}, {completeness_check}!"
if if_printout: print(status_message, end="\r")
# print(status_message)
return if_complete, ids, date_from, date_to, cloud_coverage_max
# status_message = f"{tile_id}, from {date_from} to {date_to}, maximum cloud coverage: {cloud_coverage_max}, {completeness_check}!"
# if if_printout: print(status_message, end="\r")
# return if_complete, ids, date_from, date_to, cloud_coverage_max
# plot_union_polygon(tile_id, union_polygon, tile_polygon)
def filter_bands(row, bands):
def check_if_bands_are_correct(df):
ids = list(df['id'].unique())
for id in ids:
df_id = df.loc[df['id'] == id]
tile_id = list(df_id['tile'])[0]
tile_date = list(df_id['date'])[0]
# tile_id = df_id['tile']
if len(df_id) != 7:
print(f"{tile_id}-{tile_date}: bands are not correct!")
# raise ValueError(f"{tile_id}-{tile_date}: bands are not correct!")
def filter_bands(row):
bands = {
'l30': ["B02", "B03", "B04", "B05", "B06", "B07", "Fmask"],
's30': ["B02", "B03", "B04", "B8A", "B11", "B12", "Fmask"]
}
product_key = row["product"].split('.')[-1].lower() # Extract 's30' or 'l30'
if product_key in bands:
return any(b in row["download"] for b in bands[product_key])
......
......@@ -7,7 +7,9 @@ from all_functions import plot_histogram_of_tiles
from all_functions import filter_bands
from all_functions import filter_ids
from all_functions import tile_completeness_check_with_all_acquisitions as tile_completeness_check
# from all_functions import tile_completeness_check_with_two_acquisitions as tile_completeness_check
from all_functions import time_elapsed
from all_functions import check_if_bands_are_correct
def analyze_query_list(cloud_coverage_step = 10):
input_file = f"amazon-download-links_cloud-coverage-step-{cloud_coverage_step}.csv"
......@@ -15,25 +17,24 @@ def analyze_query_list(cloud_coverage_step = 10):
print(f"There exists no such file as {input_file}")
return
df = pd.read_csv(input_file)
bands = {
'l30': ["B02", "B03", "B04", "B05", "B06", "B07", "Fmask"],
's30': ["B02", "B03", "B04", "B8A", "B11", "B12", "Fmask"]
}
df_band_filtered = df[df.apply(lambda row: filter_bands(row, bands), axis=1)]
df_sorted = df_band_filtered.loc[df_band_filtered['date'].sort_values().index]
# df_band_filtered = df[df.apply(lambda row: filter_bands(row), axis=1)]
# df_sorted = df_band_filtered.loc[df_band_filtered['date'].sort_values().index]
tile_id_list = list(df['tile'].unique())
# for tile_id in tile_id_list: print(tile_id)
df_selected = pd.DataFrame(columns=df.columns)
df = df_band_filtered
print(f"Clodud step size:{cloud_coverage_step}, number of files:{len(df)}")
# df_selected = pd.DataFrame(columns=df.columns)
# df = df_band_filtered
df_unique = df.drop_duplicates(subset=['download'])
# df = df_unique
print(f"Cloud step size:{cloud_coverage_step}, number of files:{len(df)}")
time_interval_list = []
cloud_coverage_max_list = []
incomplete_tile_list = []
for tile_id in tile_id_list:
df_tile = df.loc[df['tile'] == tile_id]
check_if_bands_are_correct(df_tile)
if_complete, ids, date_from, date_to, cloud_coverage_max_current = tile_completeness_check(df_tile, tile_id, if_printout = False)
df_tile_selected = df_tile[df_tile.apply(lambda row: filter_ids(row, ids), axis=1)]
df_selected = pd.concat([df_selected, df_tile_selected], ignore_index=True)
# df_tile_selected = df_tile[df_tile.apply(lambda row: filter_ids(row, ids), axis=1)]
# df_selected = pd.concat([df_selected, df_tile_selected], ignore_index=True)
if not if_complete:
incomplete_tile_list.append(tile_id)
months_elapsed, days_elapsed = time_elapsed(date_from, date_to)
......@@ -44,7 +45,7 @@ def analyze_query_list(cloud_coverage_step = 10):
image_output_file = f"histogram-cloud-step-size-{cloud_coverage_step}.png"
if os.path.isdir(image_output_dir):
image_output_file = image_output_dir+"/"+image_output_file
df_selected.to_csv("final_"+input_file, header=True, index=False)
# df_unique.to_csv("final_"+input_file, header=True, index=False)
plot_histogram_of_tiles(time_interval_list, cloud_coverage_max_list, cloud_coverage_step, image_output_file)
if len(incomplete_tile_list) > 0:
df_incomplete = pd.DataFrame(incomplete_tile_list, columns=['tile'])
......
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment