Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hls-download-pipeline
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Ehsan Zandi
hls-download-pipeline
Commits
26cacaff
Commit
26cacaff
authored
3 months ago
by
Ehsan
Browse files
Options
Downloads
Patches
Plain Diff
temp changes, it might need to be reverted
parent
d5a0cca7
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
all_functions.py
+49
-4
49 additions, 4 deletions
all_functions.py
check_query_lists.py
+12
-5
12 additions, 5 deletions
check_query_lists.py
download_hls.py.old
+0
-0
0 additions, 0 deletions
download_hls.py.old
with
61 additions
and
9 deletions
all_functions.py
+
49
−
4
View file @
26cacaff
...
@@ -402,7 +402,7 @@ def plot_union_polygon(tile_id, union_polygon, tile_polygon):
...
@@ -402,7 +402,7 @@ def plot_union_polygon(tile_id, union_polygon, tile_polygon):
list_polygon
.
append
(
tile_polygon
)
list_polygon
.
append
(
tile_polygon
)
draw_polygon
(
tile_id
,
list_polygon
,
line_style_list
=
[
'
b-
'
,
'
r--
'
],
fill_style_list
=
[
'
lightblue
'
,
'
white
'
],
plotNow
=
False
)
draw_polygon
(
tile_id
,
list_polygon
,
line_style_list
=
[
'
b-
'
,
'
r--
'
],
fill_style_list
=
[
'
lightblue
'
,
'
white
'
],
plotNow
=
False
)
def
completeness_check
(
df
,
tile_id
=
[],
if_printout
=
True
):
def
tile_
completeness_check
_with_all_acquisitions
(
df
,
tile_id
=
[],
if_printout
=
True
):
df_fmask
=
df
.
loc
[
df
[
'
download
'
].
str
.
contains
(
"
FMask
"
,
case
=
False
,
na
=
False
)]
df_fmask
=
df
.
loc
[
df
[
'
download
'
].
str
.
contains
(
"
FMask
"
,
case
=
False
,
na
=
False
)]
df_fmask
.
index
=
range
(
0
,
len
(
df_fmask
.
index
))
df_fmask
.
index
=
range
(
0
,
len
(
df_fmask
.
index
))
df_acq_coords
=
df_fmask
[
'
acq_coords
'
]
df_acq_coords
=
df_fmask
[
'
acq_coords
'
]
...
@@ -412,7 +412,7 @@ def completeness_check(df, tile_id= [], if_printout = True):
...
@@ -412,7 +412,7 @@ def completeness_check(df, tile_id= [], if_printout = True):
union_polygon
=
Polygon
([])
union_polygon
=
Polygon
([])
acquisitions_coords
=
[]
acquisitions_coords
=
[]
if_complete
=
False
if_complete
=
False
if_
complete
=
"
incomplete
"
complete
ness_check
=
"
incomplete
"
ids
=
[]
ids
=
[]
for
idx
in
df_fmask
.
index
:
for
idx
in
df_fmask
.
index
:
cloud_coverage_current
=
df_fmask
[
'
cloud
'
].
loc
[
idx
]
cloud_coverage_current
=
df_fmask
[
'
cloud
'
].
loc
[
idx
]
...
@@ -433,6 +433,51 @@ def completeness_check(df, tile_id= [], if_printout = True):
...
@@ -433,6 +433,51 @@ def completeness_check(df, tile_id= [], if_printout = True):
date_from
=
min
(
df
[
'
date
'
])
date_from
=
min
(
df
[
'
date
'
])
status_message
=
f
"
{
tile_id
}
, from
{
date_from
}
to
{
date_to
}
, maximum cloud coverage:
{
cloud_coverage_max
}
,
{
completeness_check
}
!
"
status_message
=
f
"
{
tile_id
}
, from
{
date_from
}
to
{
date_to
}
, maximum cloud coverage:
{
cloud_coverage_max
}
,
{
completeness_check
}
!
"
if
if_printout
:
print
(
status_message
,
end
=
"
\r
"
)
if
if_printout
:
print
(
status_message
,
end
=
"
\r
"
)
if
not
if_complete
:
print
(
status_message
)
return
if_complete
,
ids
,
date_from
,
date_to
,
cloud_coverage_max
# plot_union_polygon(tile_id, union_polygon, tile_polygon)
def
tile_completeness_check_with_two_acquisitions
(
df
,
tile_id
=
[],
if_printout
=
True
):
df_fmask
=
df
.
loc
[
df
[
'
download
'
].
str
.
contains
(
"
FMask
"
,
case
=
False
,
na
=
False
)]
df_fmask
.
index
=
range
(
0
,
len
(
df_fmask
.
index
))
df_acq_coords
=
df_fmask
[
'
acq_coords
'
]
tile_coords
=
ast
.
literal_eval
(
df_fmask
[
'
tile_coords
'
].
loc
[
0
])
tile_polygon
=
Polygon
(
tile_coords
)
cloud_coverage_max
=
0
completeness_check
=
"
incomplete
"
if_complete
=
False
ids
=
""
for
first_idx
in
range
(
0
,
len
(
df_fmask
)
-
1
):
ids
=
[
df_fmask
[
'
id
'
].
loc
[
first_idx
]]
print
(
"
here:
"
,
ids
)
union_polygon
=
Polygon
([])
if_complete
=
False
cloud_coverage_first
=
df_fmask
[
'
cloud
'
].
loc
[
first_idx
]
for
second_idx
in
range
(
first_idx
,
len
(
df_fmask
)):
cloud_coverage_second
=
df_fmask
[
'
cloud
'
].
loc
[
second_idx
]
acq_coords
=
ast
.
literal_eval
(
df_fmask
[
'
acq_coords
'
].
loc
[
second_idx
])
# coord_tmp = [[float(coord_tmp[i+1]),float(coord_tmp[i])] for i in range(0,int(len(coord_tmp)),2)]
acquisition_polygon
=
Polygon
(
acq_coords
)
union_polygon
=
union_polygon
.
union
(
acquisition_polygon
)
union_polygon
=
tile_polygon
.
intersection
(
union_polygon
)
polygon_surface_relative_diff_percent
=
(
tile_polygon
.
area
-
union_polygon
.
area
)
/
tile_polygon
.
area
*
100
if
polygon_surface_relative_diff_percent
<
1e-1
:
if_complete
=
True
ids
.
append
(
df_fmask
[
'
id
'
].
loc
[
second_idx
])
cloud_coverage_max
=
max
(
cloud_coverage_first
,
cloud_coverage_second
)
print
(
cloud_coverage_max
)
break
if
if_complete
:
print
(
cloud_coverage_max
)
completeness_check
=
"
complete
"
break
print
(
cloud_coverage_max
)
# date_to = max(df_fmask['date'].loc[first_idx], df_fmask['date'].loc[second_idx])
# date_from = min(df_fmask['date'].loc[first_idx], df_fmask['date'].loc[second_idx])
date_from
=
"
2020-01-01
"
date_to
=
"
2024-12-31
"
status_message
=
f
"
{
tile_id
}
, from
{
date_from
}
to
{
date_to
}
, maximum cloud coverage:
{
cloud_coverage_max
}
,
{
completeness_check
}
!
"
if
if_printout
:
print
(
status_message
,
end
=
"
\r
"
)
# print(status_message)
# print(status_message)
return
if_complete
,
ids
,
date_from
,
date_to
,
cloud_coverage_max
return
if_complete
,
ids
,
date_from
,
date_to
,
cloud_coverage_max
# plot_union_polygon(tile_id, union_polygon, tile_polygon)
# plot_union_polygon(tile_id, union_polygon, tile_polygon)
...
@@ -530,7 +575,7 @@ def plot_histogram_of_tiles(time_interval_list, cloud_coverage_max_list, cloud_s
...
@@ -530,7 +575,7 @@ def plot_histogram_of_tiles(time_interval_list, cloud_coverage_max_list, cloud_s
# axs[0].set_title('Histogram of numbr of days')
# axs[0].set_title('Histogram of numbr of days')
axs
[
0
].
hist
(
data
,
density
=
False
,
bins
=
num_bins
,
color
=
'
blue
'
,
edgecolor
=
'
blue
'
)
axs
[
0
].
hist
(
data
,
density
=
False
,
bins
=
num_bins
,
color
=
'
blue
'
,
edgecolor
=
'
blue
'
)
axs
[
0
].
set_xlabel
(
'
Number of days
'
)
axs
[
0
].
set_xlabel
(
'
Acquisition interval (months)
'
)
axs
[
0
].
set_ylabel
(
'
Number of tiles
'
)
axs
[
0
].
set_ylabel
(
'
Number of tiles
'
)
data
=
cloud_coverage_max_list
data
=
cloud_coverage_max_list
...
@@ -539,6 +584,6 @@ def plot_histogram_of_tiles(time_interval_list, cloud_coverage_max_list, cloud_s
...
@@ -539,6 +584,6 @@ def plot_histogram_of_tiles(time_interval_list, cloud_coverage_max_list, cloud_s
axs
[
1
].
hist
(
data
,
bins
=
num_bins
,
color
=
'
blue
'
,
edgecolor
=
'
blue
'
)
axs
[
1
].
hist
(
data
,
bins
=
num_bins
,
color
=
'
blue
'
,
edgecolor
=
'
blue
'
)
axs
[
1
].
set_xlabel
(
'
Cloud coverage (%)
'
)
axs
[
1
].
set_xlabel
(
'
Cloud coverage (%)
'
)
axs
[
1
].
set_ylabel
(
'
Number of tiles
'
)
axs
[
1
].
set_ylabel
(
'
Number of tiles
'
)
fig
.
suptitle
(
f
"
Cloud coverage step size:
{
cloud_step_size
}
(%)
"
,
fontsize
=
16
)
#
fig.suptitle(f"Cloud coverage step size: {cloud_step_size} (%)", fontsize=16)
# plt.show()
# plt.show()
fig
.
savefig
(
image_output_file
,
bbox_inches
=
'
tight
'
)
fig
.
savefig
(
image_output_file
,
bbox_inches
=
'
tight
'
)
This diff is collapsed.
Click to expand it.
check_query_lists.py
+
12
−
5
View file @
26cacaff
...
@@ -5,7 +5,8 @@ import argparse
...
@@ -5,7 +5,8 @@ import argparse
import
os
import
os
from
all_functions
import
plot_histogram_of_tiles
from
all_functions
import
plot_histogram_of_tiles
from
all_functions
import
filter_bands
from
all_functions
import
filter_bands
from
all_functions
import
completeness_check
from
all_functions
import
filter_ids
from
all_functions
import
tile_completeness_check_with_all_acquisitions
as
tile_completeness_check
from
all_functions
import
time_elapsed
from
all_functions
import
time_elapsed
def
analyze_query_list
(
cloud_coverage_step
=
10
):
def
analyze_query_list
(
cloud_coverage_step
=
10
):
...
@@ -24,20 +25,26 @@ def analyze_query_list(cloud_coverage_step = 10):
...
@@ -24,20 +25,26 @@ def analyze_query_list(cloud_coverage_step = 10):
# for tile_id in tile_id_list: print(tile_id)
# for tile_id in tile_id_list: print(tile_id)
df_selected
=
pd
.
DataFrame
(
columns
=
df
.
columns
)
df_selected
=
pd
.
DataFrame
(
columns
=
df
.
columns
)
df
=
df_band_filtered
df
=
df_band_filtered
print
(
f
"
Clodud step size:
{
cloud_coverage_step
}
, number of files:
{
len
(
df
)
}
"
)
time_interval_list
=
[]
time_interval_list
=
[]
cloud_coverage_max_list
=
[]
cloud_coverage_max_list
=
[]
incomplete_tile_list
=
[]
incomplete_tile_list
=
[]
for
tile_id
in
tile_id_list
:
for
tile_id
in
tile_id_list
:
df_tile
=
df
.
loc
[
df
[
'
tile
'
]
==
tile_id
]
df_tile
=
df
.
loc
[
df
[
'
tile
'
]
==
tile_id
]
if_complete
,
ids
,
date_from
,
date_to
,
cloud_coverage_max_current
=
completeness_check
(
df_tile
,
tile_id
,
if_printout
=
False
)
if_complete
,
ids
,
date_from
,
date_to
,
cloud_coverage_max_current
=
tile_
completeness_check
(
df_tile
,
tile_id
,
if_printout
=
False
)
#
df_tile_selected = df_tile[df_tile.apply(lambda row: filter_ids(row, ids), axis=1)]
df_tile_selected
=
df_tile
[
df_tile
.
apply
(
lambda
row
:
filter_ids
(
row
,
ids
),
axis
=
1
)]
#
df_selected = pd.concat([df_selected, df_tile_selected], ignore_index=True)
df_selected
=
pd
.
concat
([
df_selected
,
df_tile_selected
],
ignore_index
=
True
)
if
not
if_complete
:
if
not
if_complete
:
incomplete_tile_list
.
append
(
tile_id
)
incomplete_tile_list
.
append
(
tile_id
)
months_elapsed
,
days_elapsed
=
time_elapsed
(
date_from
,
date_to
)
months_elapsed
,
days_elapsed
=
time_elapsed
(
date_from
,
date_to
)
time_interval_list
.
append
(
months_elapsed
)
time_interval_list
.
append
(
months_elapsed
)
cloud_coverage_max_list
.
append
(
cloud_coverage_max_current
)
cloud_coverage_max_list
.
append
(
cloud_coverage_max_current
)
image_output_file
=
f
"
histogram_cloud-step-size-
{
cloud_coverage_step
}
.pdf
"
image_output_dir
=
os
.
environ
[
"
HOME
"
]
+
"
/git/jugit/3d-abc-slides/hls-download-pipeline/figs
"
image_output_directory
=
"
~/git/jugit/3d-abc-slides/hls-download-pipeline/figs
"
image_output_file
=
f
"
histogram-cloud-step-size-
{
cloud_coverage_step
}
.png
"
if
os
.
path
.
isdir
(
image_output_dir
):
image_output_file
=
image_output_dir
+
"
/
"
+
image_output_file
df_selected
.
to_csv
(
"
final_
"
+
input_file
,
header
=
True
,
index
=
False
)
plot_histogram_of_tiles
(
time_interval_list
,
cloud_coverage_max_list
,
cloud_coverage_step
,
image_output_file
)
plot_histogram_of_tiles
(
time_interval_list
,
cloud_coverage_max_list
,
cloud_coverage_step
,
image_output_file
)
if
len
(
incomplete_tile_list
)
>
0
:
if
len
(
incomplete_tile_list
)
>
0
:
df_incomplete
=
pd
.
DataFrame
(
incomplete_tile_list
,
columns
=
[
'
tile
'
])
df_incomplete
=
pd
.
DataFrame
(
incomplete_tile_list
,
columns
=
[
'
tile
'
])
...
...
This diff is collapsed.
Click to expand it.
download_hls.py
→
download_hls.py
.old
+
0
−
0
View file @
26cacaff
File moved
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment