接着爬取图片讨论:
将爬取的图片进行转移
Dockerfile
FROM python:3.9-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD ["python", "app.py"]
README.md最后加入
app_port: 8080
app.py
import os
from flask import Flask
from webdav3.client import Client
import paramiko
import logging
from requests.auth import HTTPBasicAuth
# 配置日志
logging.basicConfig(level=logging.INFO)
# 从环境变量获取配置信息
WEBDAV_URL = os.getenv('WEBDAV_URL')
WEBDAV_USERNAME = os.getenv('WEBDAV_USERNAME')
WEBDAV_PASSWORD = os.getenv('WEBDAV_PASSWORD')
WEBDAV_PATH = os.getenv('WEBDAV_PATH')
SSH_HOST = os.getenv('SSH_HOST')
SSH_PORT = 22
SSH_USERNAME = os.getenv('SSH_USERNAME')
SSH_PASSWORD = os.getenv('SSH_PASSWORD')
SSH_PATH = os.getenv('SSH_PATH')
app = Flask(__name__)
def safe_download(webdav_client, remote_path, local_path):
try:
webdav_client.download(remote_path, local_path)
except KeyError as e:
if str(e) == "'content-length'":
logging.warning(f"'content-length' missing for {remote_path}. Using alternative download method.")
# 使用基本认证进行请求
auth = HTTPBasicAuth(WEBDAV_USERNAME, WEBDAV_PASSWORD)
response = webdav_client.session.get(webdav_client.get_url(remote_path), stream=True, auth=auth)
if response.status_code != 200:
logging.error(f"Failed to download {remote_path}. Status code: {response.status_code}")
return
with open(local_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
logging.info(f"Downloaded file size: {os.path.getsize(local_path)} bytes")
else:
raise
@app.route('/')
def transfer_images():
options = {
'webdav_hostname': WEBDAV_URL,
'webdav_login': WEBDAV_USERNAME,
'webdav_password': WEBDAV_PASSWORD
}
webdav_client = Client(options)
# 创建SSH客户端
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(SSH_HOST, port=SSH_PORT, username=SSH_USERNAME, password=SSH_PASSWORD)
sftp = ssh.open_sftp()
# 获取WebDAV中的文件列表
files = webdav_client.list(WEBDAV_PATH)
# 转移文件
for file in files:
if file.endswith('.jpg'):
file_name = os.path.basename(file)
remote_path = os.path.join(SSH_PATH, file_name)
logging.info(f"Checking if file {file_name} exists on remote server...")
try:
sftp.stat(remote_path)
logging.info(f"File {file_name} already exists. Skipping...")
except FileNotFoundError:
local_tmp_file = '/tmp/' + file_name
safe_download(webdav_client, f"{WEBDAV_PATH}/{file_name}", local_tmp_file)
if os.path.exists(local_tmp_file) and os.path.getsize(local_tmp_file) > 0:
sftp.put(local_tmp_file, remote_path)
logging.info(f"File {file_name} transferred with size: {os.path.getsize(local_tmp_file)} bytes")
sftp.close()
ssh.close()
return "Transfer complete!"
if __name__ == "__main__":
app.run(host='0.0.0.0', port=8080)
requirements.txt
requests
paramiko
webdavclient3
flask
huggingface后台变量
WebDAV相关变量
WEBDAV_URL:WebDAV的URL。
WEBDAV_USERNAME:WebDAV的用户名。
WEBDAV_PASSWORD:WebDAV的密码。
WEBDAV_PATH:需要获取文件的WebDAV路径。
SSH相关变量
SSH_HOST:SSH IP地址。
SSH_USERNAME:SSH的用户名。
SSH_PASSWORD:SSH的密码。
SSH_PATH:文件需要上传到的SSH服务器路径。