
Hi community!
- In the first part of this Blog series, we have learned how to use the Rclone tool to download data from Amazon, first, using Rclone commands and then, using Python scripts.
- In the second part, we have seen how to download only GOES-R imagery from minutes 20 and 50 every hour (to compliment the data available on GNC-A) and suggested other download schemes.
On this part, we’ll post two example scripts, one to download historical data, and the other to dowload the most recent data available.
==== DOWNLOAD HISTORICAL DATA ====
In the script below you may change:
- Year(s) of interest: Line 27
- Julian day(s) (three digits!): Line 28 (you may check the julian days at this link)
- ABI Channel(s): Line 29 (C01 to C16)
- ABI Product(s) to Download: Line 25
- Minimum and Maximum hours to Download: Lines 30 and 31
The ABI Products (Line 25) may be:
- L1b-RadC: Level 1b Radiances (CONUS)
- L1b-RadF: Level 1b Radiances (Full-Disk)
- L1b-RadM: Level 1b Radiances (Mesoscale)
- L2-CMIPC: Level 2 CMI (CONUS)
- L2-CMIPF: Level 2 CMI (Full-Disk)
- L2-CMIPM: Level 2 CMI (Mesoscale)
- L2-MCMIPC: Level 2 CMI (CONUS) – All 16 bands [2 km] in a single NetCDF file.
- L2-MCMIPF: Level 2 CMI (Full-Disk) – All 16 bands [2 km] in a single NetCDF file.
- L2-MCMIPM: Level 2 CMI (Mesoscale) – All 16 bands [2 km] in a single NetCDF file.
############################################################
# LICENSE
# Copyright (C) 2019 - INPE - NATIONAL INSTITUTE FOR SPACE RESEARCH
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
############################################################
# Required Modules
import os # Miscellaneous operating system interfaces
import subprocess # The subprocess module allows you to spawn new processes, connect to their input/output/error pipes, and obtain their return codes.
import datetime # Basic date and time types
import sys # System-specific parameters and functions
import platform # Access to underlying platform’s identifying data
import re # Regular expression operations
osystem = platform.system()
if osystem == "Windows": extension = '.exe'
# Welcome message
print ("GOES-R Big Data Python / Rclone Downloader: Historical Data")
# Desired Data
BUCKET = 'noaa-goes16' # For GOES-R the buckets are: ['noaa-goes16', 'noaa-goes17']
PRODUCT = 'ABI-L2-CMIPF' # Choose from ['ABI-L1b-RadC', 'ABI-L1b-RadF', 'ABI-L1b-RadM', 'ABI-L2-CMIPC', 'ABI-L2-CMIPF', 'ABI-L2-CMIPM', 'ABI-L2-MCMIPC', 'ABI-L2-MCMIPF', 'ABI-L2-MCMIPM']
YEAR = ['2019']
JULIAN_DAY = ['224', '230', '231']
CHANNEL = ['C01', 'C02', 'C03', 'C13'] # Choose from ['C01', 'C02', 'C03', 'C04', 'C05', 'C06', 'C07', 'C08', 'C09', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16']
HOUR_MIN = '06' # Minimum hour to download
HOUR_MAX = '14' # Maximum hour to download
OUTDIR = "C:\\Rclone\\" # Choose the output directory
for year in YEAR:
for julian_day in JULIAN_DAY:
for channel in CHANNEL:
# Get the hours available for that given day
HOUR = subprocess.check_output('rclone' + extension + " " + 'lsd publicAWS:' + BUCKET + "/" + PRODUCT + "/" + year + "/" + julian_day + "/", shell=True)
# Change type from 'bytes' to 'string'
HOUR = HOUR.decode()
# Split hours based on the new line and remove the empty item at the end.
HOUR = HOUR.split('\n')
HOUR.remove('')
HOUR = [i.split(" ")[-1] for i in HOUR]
#print(HOUR)
HOUR = [ elem for elem in HOUR if not int(elem) int(HOUR_MAX)]
#print (HOUR)
for hour in HOUR:
files = subprocess.check_output('rclone' + extension + " " + 'ls publicAWS:' + BUCKET + "/" + PRODUCT + "/" + year + "/" + julian_day + "/" + hour + "/", shell=True)
# Change type from 'bytes' to 'string'
files = files.decode()
# Split files based on the new line and remove the empty item at the end.
files = files.split('\n')
files.remove('')
# Get only the file names for an specific channel
files = [x for x in files if channel in x ]
# Get only the file names, without the file sizes
files = [i.split(" ")[-1] for i in files]
# Print the file names list
print ("File list for this particular time, date and channel:")
print(year)
print(julian_day)
print(channel)
print(hour)
if not files:
print("No files available yet... Exiting loop")
break # No new files available in the cloud yet. Exiting the loop.
for i in files:
print(i)
print ("Checking if the file is on the log...")
# If the log file doesn't exist yet, create one
file = open('goes16_aws_log_' + str(datetime.datetime.now())[0:10] + '.txt', 'a')
file.close()
# Put all file names on the log in a list
log = []
with open('goes16_aws_log_' + str(datetime.datetime.now())[0:10] + '.txt') as f:
log = f.readlines()
# Remove the line feeds
log = [x.strip() for x in log]
if i not in log:
#print(i)
print ("Not on the log! Downloading the file for channel: ", channel)
# Download the most recent file for this particular hour
os.system('rclone' + extension + " " + 'copy publicAWS:' + BUCKET + "/" + PRODUCT + "/" + year + "/" + julian_day + "/" + hour + "/" + i + " " + OUTDIR)
print ("Download finished!")
print ("Putting the file name on the log...")
# Put the processed file on the log
import datetime # Basic Date and Time types
with open('goes16_aws_log_' + str(datetime.datetime.now())[0:10] + '.txt', 'a') as log:
log.write(str(datetime.datetime.now()))
log.write('\n')
log.write(i + '\n')
log.write('\n')
else:
print("This file was already downloaded.")
==== DOWNLOAD THE CURRENT DATA ====
In the script below you may change:
- ABI Channel(s): Line 29 (C01 to C16)
- ABI Product(s) to Download: Line 25
The ABI Products (Line 25) may be:
- L1b-RadC: Level 1b Radiances (CONUS)
- L1b-RadF: Level 1b Radiances (Full-Disk)
- L1b-RadM: Level 1b Radiances (Mesoscale)
- L2-CMIPC: Level 2 CMI (CONUS)
- L2-CMIPF: Level 2 CMI (Full-Disk)
- L2-CMIPM: Level 2 CMI (Mesoscale)
- L2-MCMIPC: Level 2 CMI (CONUS) – All 16 bands [2 km] in a single NetCDF file.
- L2-MCMIPF: Level 2 CMI (Full-Disk) – All 16 bands [2 km] in a single NetCDF file.
- L2-MCMIPM: Level 2 CMI (Mesoscale) – All 16 bands [2 km] in a single NetCDF file.
############################################################
# LICENSE
# Copyright (C) 2019 - INPE - NATIONAL INSTITUTE FOR SPACE RESEARCH
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
############################################################
# Required Modules
import os # Miscellaneous operating system interfaces
import subprocess # The subprocess module allows you to spawn new processes, connect to their input/output/error pipes, and obtain their return codes.
import datetime # Basic date and time types
import platform # Access to underlying platform’s identifying data
osystem = platform.system()
if osystem == "Windows": extension = '.exe'
print ("GOES-R Big Data Python / Rclone Downloader: Current Data")
# Desired Data
BUCKET = 'noaa-goes16' # For GOES-R the buckets are: ['noaa-goes16', 'noaa-goes17']
PRODUCT = 'ABI-L2-CMIPF' # Choose from ['ABI-L1b-RadC', 'ABI-L1b-RadF', 'ABI-L1b-RadM', 'ABI-L2-CMIPC', 'ABI-L2-CMIPF', 'ABI-L2-CMIPM', 'ABI-L2-MCMIPC', 'ABI-L2-MCMIPF', 'ABI-L2-MCMIPM']
YEAR = str(datetime.datetime.now().year) # Year got from local machine
JULIAN_DAY = str(datetime.datetime.now().timetuple().tm_yday) # Julian day got from local machine
UTC_DIFF = +3 # How many hours UTC is ahead (+) or behind you (-)
HOUR = str(datetime.datetime.now().hour + UTC_DIFF).zfill(2) # Hour got from local machine corrected for UTC
print ("Current year, julian day and hour based on your local machine:")
print("YEAR: ", YEAR)
print("JULIAN DAY: ", JULIAN_DAY)
print("HOUR (UTC): ", HOUR)
CHANNEL = ['C01', 'C02', 'C03', 'C13'] # Choose from ['C01', 'C02', 'C03', 'C04', 'C05', 'C06', 'C07', 'C08', 'C09', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16']
OUTDIR = "C:\\Rclone\\" # Choose the output directory
for channel in CHANNEL:
# Get output from rclone command, based on the desired data
files = subprocess.check_output('rclone' + extension + " " + 'ls publicAWS:' + BUCKET + "/" + PRODUCT + "/" + YEAR + "/" + JULIAN_DAY + "/" + HOUR + "/", shell=True)
# Change type from 'bytes' to 'string'
files = files.decode()
# Split files based on the new line and remove the empty item at the end.
files = files.split('\n')
files.remove('')
# Get only the file names for an specific channel
files = [x for x in files if channel in x ]
# Get only the file names, without the file sizes
files = [i.split(" ")[-1] for i in files]
# Print the file names list
#print ("File list for this particular time, date and channel:")
#for i in files:
# print(i)
if not files:
print("No files available yet... Exiting script")
break # No new files available in the cloud yet. Exiting the loop.
print ("Checking if the file is on the daily log...")
# If the log file doesn't exist yet, create one
file = open('goes16_aws_log_' + str(datetime.datetime.now())[0:10] + '.txt', 'a')
file.close()
# Put all file names on the log in a list
log = []
with open('goes16_aws_log_' + str(datetime.datetime.now())[0:10] + '.txt') as f:
log = f.readlines()
# Remove the line feeds
log = [x.strip() for x in log]
if files[-1] not in log:
print ("Downloading the most recent file for channel: ", channel)
# Download the most recent file for this particular hour
print(files[-1])
os.system('rclone' + extension + " " + 'copy publicAWS:' + BUCKET + "/" + PRODUCT + "/" + YEAR + "/" + JULIAN_DAY + "/" + HOUR + "/" + files[-1] + " " + OUTDIR)
print ("Download finished!")
print ("Putting the file name on the daily log...")
# Put the processed file on the log
import datetime # Basic Date and Time types
with open('goes16_aws_log_' + str(datetime.datetime.now())[0:10] + '.txt', 'a') as log:
log.write(str(datetime.datetime.now()))
log.write('\n')
log.write(files[-1] + '\n')
log.write('\n')
else:
print("This file was already downloaded.")