-
Notifications
You must be signed in to change notification settings - Fork 282
Expand file tree
/
Copy pathdownload.py
More file actions
58 lines (47 loc) · 1.7 KB
/
download.py
File metadata and controls
58 lines (47 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# download.py
import os
import sys
import urllib3
from urllib.parse import urlparse
import pandas as pd
import itertools
import shutil
from urllib3.util import Retry
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
classes = ["cat", "fish"]
set_types = ["train", "test", "val"]
def download_image(url, klass, data_type):
basename = os.path.basename(urlparse(url).path)
filename = "{}/{}/{}".format(data_type, klass, basename)
if not os.path.exists(filename):
try:
http = urllib3.PoolManager(retries=Retry(connect=1, read=1, redirect=2))
with http.request("GET", url, preload_content=False) as resp, open(
filename, "wb"
) as out_file:
if resp.status == 200:
shutil.copyfileobj(resp, out_file)
else:
print("Error downloading {}".format(url))
resp.release_conn()
except:
print("Error downloading {}".format(url))
if __name__ == "__main__":
if not os.path.exists("images.csv"):
print("Error: can't find images.csv!")
sys.exit(0)
# get args and create output directory
imagesDF = pd.read_csv("images.csv")
for set_type, klass in list(itertools.product(set_types, classes)):
path = "./{}/{}".format(set_type, klass)
if not os.path.exists(path):
print("Creating directory {}".format(path))
os.makedirs(path)
print("Downloading {} images".format(len(imagesDF)))
result = [
download_image(url, klass, data_type)
for url, klass, data_type in zip(
imagesDF["url"], imagesDF["class"], imagesDF["type"]
)
]
sys.exit(0)