88 lines
2.5 KiB
Python
88 lines
2.5 KiB
Python
"""Implementation of Apache VFS schemes and URLs."""
|
|
|
|
import sys
|
|
import re
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
# Supported URI schemes and their mapping to GDAL's VSI suffix.
|
|
# TODO: extend for other cloud plaforms.
|
|
SCHEMES = {
|
|
'ftp': 'curl',
|
|
'gzip': 'gzip',
|
|
'http': 'curl',
|
|
'https': 'curl',
|
|
's3': 's3',
|
|
'tar': 'tar',
|
|
'zip': 'zip',
|
|
'gs': 'gs',
|
|
}
|
|
|
|
CURLSCHEMES = {k for k, v in SCHEMES.items() if v == 'curl'}
|
|
|
|
# TODO: extend for other cloud plaforms.
|
|
REMOTESCHEMES = {k for k, v in SCHEMES.items() if v in ('curl', 's3', 'gs')}
|
|
|
|
|
|
def valid_vsi(vsi):
|
|
"""Ensures all parts of our vsi path are valid schemes."""
|
|
return all(p in SCHEMES for p in vsi.split('+'))
|
|
|
|
|
|
def is_remote(scheme):
|
|
if scheme is None:
|
|
return False
|
|
return any(p in REMOTESCHEMES for p in scheme.split('+'))
|
|
|
|
|
|
def vsi_path(path, vsi=None, archive=None):
|
|
# If a VSI and archive file are specified, we convert the path to
|
|
# an OGR VSI path (see cpl_vsi.h).
|
|
if vsi:
|
|
prefix = '/'.join(f'vsi{SCHEMES[p]}' for p in vsi.split('+'))
|
|
if archive:
|
|
result = f'/{prefix}/{archive}{path}'
|
|
else:
|
|
result = f'/{prefix}/{path}'
|
|
else:
|
|
result = path
|
|
|
|
return result
|
|
|
|
|
|
def parse_paths(uri, vfs=None):
|
|
"""Parse a URI or Apache VFS URL into its parts
|
|
|
|
Returns: tuple
|
|
(path, scheme, archive)
|
|
"""
|
|
archive = scheme = None
|
|
path = uri
|
|
# Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
|
|
# URL schemes
|
|
if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
|
|
return path, None, None
|
|
if vfs:
|
|
parts = urlparse(vfs)
|
|
scheme = parts.scheme
|
|
archive = parts.path
|
|
if parts.netloc and parts.netloc != 'localhost':
|
|
archive = parts.netloc + archive
|
|
else:
|
|
parts = urlparse(path)
|
|
scheme = parts.scheme
|
|
path = parts.path
|
|
if parts.netloc and parts.netloc != 'localhost':
|
|
if scheme.split("+")[-1] in CURLSCHEMES:
|
|
# We need to deal with cases such as zip+https://server.com/data.zip
|
|
path = "{}://{}{}".format(scheme.split("+")[-1], parts.netloc, path)
|
|
else:
|
|
path = parts.netloc + path
|
|
if scheme in SCHEMES:
|
|
parts = path.split('!')
|
|
path = parts.pop() if parts else None
|
|
archive = parts.pop() if parts else None
|
|
|
|
scheme = None if not scheme else scheme
|
|
return path, scheme, archive
|