the query script ready, with arguments from command line

32f7e92d · Ehsan · d9dd0be9 · 32f7e92d · 32f7e92d · 32f7e92d
Commit 32f7e92d authored 4 months ago by Ehsan
--- a/all_functions.py
+++ b/all_functions.py
@@ -399,7 +399,8 @@ def acquisition_download_links_to_csv(tile_id, output_file_name, acquisitions =
        df_current["time"] = [acquisition['time_start'][11:-5]] * num_links
        df_current["product"] = [acquisition['producer_granule_id'][0:7]] * num_links
        df_current["tile"] = [tile_id] * num_links
-        if ifComplete: status = "complete"
+        if ifComplete and acquisition == acquisitions[-1]:
+            status = "complete"
        df_current["status"] = [status] * num_links
        df_current = df_current[df_columns]
        df = pd.concat([df, df_current], ignore_index=True)

--- a/amazon-tile_ids.csv
+++ b/amazon-tile_ids.csv
--- a/hls.py
+++ b/hls.py
+#!/usr/bin/env python3
 import os
 import pandas as pd
 import requests
@@ -6,6 +7,7 @@ import matplotlib.pyplot as plt
 from datetime import datetime
 from shapely.geometry import Polygon
 import xml.etree.ElementTree as ET
+import argparse
 from all_functions import parse_kml_to_tiles
 from all_functions import search_hls_granules_by_tile
 from all_functions import acquisition_download_links_to_csv
@@ -13,19 +15,24 @@ from all_functions import plot_union_polygon



-
-kml_file_path = './hls_tiles.kml'
-output_file_name = "amazon-download-links.csv"
-date_from = "2020-01-01"
-date_to = "2024-12-31"
-cloud_coverage_step = 10
+def query_the_area (kml_file_path = './hls_tiles.kml', output_file_name_base = "amazon-download-links",
+                    date_from = "2020-01-01", date_to = "2024-12-31", cloud_coverage_step = 10):
+    output_file_name = f"{output_file_name_base}_cloud-coverage-step-{cloud_coverage_step}.csv"
    ifPlot = False
-tile_id_list = list(pd.read_csv("amazon-dlr-tile-ids.csv", header=None)[0])
+    tile_id_list = list(pd.read_csv("amazon-tile-ids.csv", header=None)[0])
    total_number_of_tiles = len(tile_id_list)
 #     tile_id_list = ["T17LRJ"]
-# tile_id_list = ["T21MYS"]
+#     tile_id_list = ["T17MPS"]
    tile_index = 0
-if os.path.isfile(output_file_name): os.remove(output_file_name)
+if os.path.isfile(output_file_name): 
+    os.remove(output_file_name)
+if os.path.isfile(output_file_name): 
+    queried_tiles = pd.read_csv(output_file_name)
+    last_queried_tile = list(queried_tiles["tile"])[-1]
+    last_queried_tile_index = tile_id_list.index(last_queried_tile)
+    tile_id_list = tile_id_list[last_queried_tile_index:-1]
+
+
    for tile_id in tile_id_list:
        ifComplete = False
        tile_index += 1
@@ -33,10 +40,10 @@ for tile_id in tile_id_list:
        polygon_surface_relative_diff_percent = 100
        tile_polygon, tile_coords = parse_kml_to_tiles(kml_file_path, target_tile_id=tile_id)
        # granules = search_hls_granules_by_tile(["HLSL30"], tile_id, date_from, date_to, cloud_coverage_threshold)
-    needed_number_of_acquisitions = 0
        for cloud_coverage_threshold in range(0,101,cloud_coverage_step):
            granules = search_hls_granules_by_tile(["HLSL30", "HLSS30"], tile_id, date_from, date_to, cloud_coverage_threshold, cloud_coverage_step)
            union_polygon = Polygon([])
+            needed_number_of_acquisitions = 0
            for acquisition in granules:
                needed_number_of_acquisitions += 1
                # print(acquisition['title'][15:-5])
@@ -54,7 +61,7 @@ for tile_id in tile_id_list:
                status_message = f"{tile_id}, cloud coverage: {cloud_coverage_threshold}, complete in {needed_number_of_acquisitions} acquisitions."
            else:
                status_message = f"{tile_id}, cloud coverage: {cloud_coverage_threshold}, incomplete in {needed_number_of_acquisitions} acquisitions."
-        print(status_message, counter_message, end="\r")
+            print(status_message, counter_message)#, end="\r")
            # if polygon_surface_relative_diff_percent > 1e-1:
            #     status = f"{tile_id} is incomplete, surface diff: {polygon_surface_relative_diff_percent}%. Number of acquisitions: {len(granules)}."
            #     if ifPlot:
@@ -62,9 +69,13 @@ for tile_id in tile_id_list:
            acquisition_download_links_to_csv(tile_id, output_file_name, acquisitions=granules[0:needed_number_of_acquisitions], ifComplete=ifComplete)
            if ifComplete: 
                break
-    # break
-
-

+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Query an area for HLS granules.")
+    parser.add_argument("-k","--kml-file",   type=str, default='hls_tiles.kml', help="The address to the kml file for reading the tile coordinates (default: hls_tile.kml)")
+    parser.add_argument("-c","--cloud-step", type=int, default=30, help="The step size for the cloud coverage threshold (default: 10)")
+    args = parser.parse_args()
+#     query_the_area (kml_file_path = args.kml_file, date_from = "2020-01-01", date_to = "2024-12-31")
+    query_the_area (kml_file_path = args.kml_file, date_from = "2020-01-01", date_to = "2024-12-31", cloud_coverage_step = args.cloud_step)


--- a/pystack.ipynb
+++ b/pystack.ipynb