# -*- coding: utf-8 -*-
#
# file-related models.
#
# ------------------------------------------------
# imports
# -------
import base64
import datetime
import json
import os
from gems import cached
import six
from six.moves.urllib.parse import urlparse
import esp.base as base
from .__base__ import LinkedModel, BaseModel
# models
# ------
[docs]class File(LinkedModel):
"""
Object for interacting with files from the ESP database.
See the `Usage <./usage.html>`_ and `Examples <./examples.html>`_ pages
of the documentation for more context and comprehensive examples of
how to create and use this type of objects.
Configuration:
Create file:
.. code-block:: yaml
name: My Protocol SOP
desc: An SOP file
tags: [sop, protocol]
uri: relative/path/to/instructions.pdf
upload: true
type: PDF
Create file object with reference to existing file:
.. code-block:: yaml
name: Large Sequencing File
desc: An SOP file
tags: [sop, protocol]
uri: file:///path/to/local/file/instructions.bam
upload: true
type: raw
Create file object and register as task output:
.. code-block:: yaml
name: SampleSheet.csv
desc: Illumina Run Sheet
task_file: Illumina Runsheet
uri: /path/to/SampleSheet.csv
upload: false
type: csv
Configuration Notes:
* Due to current backend limitations, `uri` inputs can only take the
formats: "relative/path", "/absoluate/path", and "file:///absolute/path".
* If `upload` is true, uri must resolve to an existing file on the
local file system, otherwise an error will be raised. If upload is
False, file will only be registered. Upload defaults to false.
* The `task_file` parameter is primarily useful when registering pipeline output
files (since you can pass in the task instance UUID).
* "type" is used internally by ESP and is not necessary a mime type.
For instance, the mime type of a bam file is generally application/gzip
or application/octet-stream, but the type in ESP might normally be "bam".
* mime-type is an acceptable key for specifying the mime-type. If no
mime-type is provided, the client will attempt to guess the type
and pass that type along as the content type of the uploaded file.
Examples:
.. code-block:: python
>>> from esp.models import File
>>> fi = File('My Protocol SOP')
>>> fi.name, fi.created_at
('My Protocol SOP', '2019-06-21T16:04:01.199076Z')
>>> # show relationships
>>> fi.path
'/path/to/Lab7_ESP/Data/files/0000/instructions.pdf.6a523a74-6703-4474-a24e-5a463b9d9770'
>>> fi.contents
'... raw file contents ...'
>>> fi.download('local-copy.pdf')
Arguments:
ident (str): Name or uuid for object.
"""
__api__ = "files"
__api_cls__ = "Lab7File"
__allow_update__ = False
# NOTE: PUSHING META ON FILE OBJECTS NOT SUPPORTED BY BACKEND
__mutable__ = [x for x in BaseModel.__mutable__ if x != "meta"]
__exportable__ = BaseModel.__base_exportable__ + [
"uri",
"upload",
"type",
"mime-type",
]
__export_format__ = {
"uri": lambda x: x.url,
"upload": lambda x: True,
"mime-type": lambda x: x.meta.get("mime-type"),
}
[docs] @classmethod
def parse_import(cls, config, overwrite=False, allow_snapshot_uuid_remap=False):
"""
Create new object in ESP database using config file or other data.
Args:
config (str, dict, list): Config file or information to use in
creating new object.
overwrite (bool): Whether or not to delete current entry in
the ESP database.
"""
# process uri
uri = config.pop("uri", None)
parsed_uri = urlparse(os.path.expandvars(uri), scheme="file")
if parsed_uri.scheme == "file" and parsed_uri.netloc:
raise ValueError(
"File uri specified, but not a valid network location. "
"Use: relative/path, /absolute/path, or file:///absolute/path. "
"You used: {}".format(uri)
)
config["url"] = parsed_uri.geturl()
path = parsed_uri.path
config.setdefault("meta", {})
# add taskfile dependencies
task_file = config.pop("task_file", None)
if task_file is not None:
# add taskfile dep to existing list
deps = config.get("deps", [])
task_deps = [x for x in deps if len(x) == 2 and x[1] == "task_file"]
if not task_deps:
task_uuid = os.environ.get("LAB7_TASK_UUID", None)
if not task_uuid:
raise ValueError(
"Cannot register a task file while no task is running "
"unless you explicitly supply the task instance uuid."
)
deps.append([task_uuid, "task_file"])
# update tags and meta
tags, meta = config.get("tags", []), config.get("meta", {})
if "task_file" not in tags:
tags.append("task_file")
meta.update({"taskfile_name": task_file})
config.update({"meta": meta, "deps": deps, "tags": tags})
# upload file
upload = config.pop("upload", False)
if upload:
if not (parsed_uri.scheme == "" or parsed_uri.scheme == "file"):
raise ValueError(
"To upload a file, the defined URL needs be a local file location "
f'("file://..."). Your location is: {path}'
)
# check for existing file
if not os.path.exists(path):
raise ValueError(f"Cannot upload missing file: {path}")
mimetype = config.pop("mime-type", config["meta"].get("mime-type"))
if not mimetype:
# attempt to guess the mime type.
import mimetypes
mimetype = mimetypes.guess_type(path)[0]
if not mimetype:
# fallback.
# TODO: Worth introducing a fallback on magic-string-based
# approaches?
mimetype = "application/octet-stream"
# read file and do upload
with open(path, "rb") as fi:
files = {"file": (os.path.basename(path), fi, mimetype)}
config["mode"] = "upload"
config["tags"] = json.dumps(config.get("tags", []))
config["meta"] = json.dumps(config.get("meta", {}))
ctype = base.SESSION.session.headers.pop("Content-Type", None)
result = base.SESSION.post("/api/files", files=files, data=config)
if ctype is not None:
base.SESSION.session.headers["Content-Type"] = ctype
return File.from_data(result.json())
else:
# todo: There is a freakish case where we can have a scheme other than file
# in the URL and it still points to a valid file, e. g.
# http://something.org/etc/passwd
# We probably don't care about that at this point, but something to keep in mind.
# make sure modified_time and created_time are present.
# note that these are the times of the OS _file_, not the ESP
# File.
try:
stats = os.stat(path)
except FileNotFoundError as fnf:
ctime = datetime.datetime.now()
# note: no proper cross-platform-compatible way to pick up
# ctime since on posix-compliant FSs, ctime = mtime. But
# reasonable default behavior that callers can override by
# supplying meta.
mtime = ctime
size = 0
mimetype = "application/octet-stream"
else:
ctime = datetime.datetime.fromtimestamp(stats.st_ctime)
mtime = datetime.datetime.fromtimestamp(stats.st_mtime)
size = stats.st_size
import mimetypes
mimetype = mimetypes.guess_type(path)[0] or "application/octet-stream"
if "modified_time" not in config["meta"]:
config["meta"]["modified_time"] = mtime.isoformat()
if "created_time" not in config["meta"]:
config["meta"]["created_time"] = ctime.isoformat()
if "file_size" not in config["meta"]:
config["meta"]["file_size"] = size
if "mime-type" in config:
mimetype = config.pop("mime-type")
elif "mime_type" in config:
mimetype = config.pipe("mime_type")
if "mime_type" not in config["meta"]:
config["meta"]["mime_type"] = mimetype
if "original_filename" not in config["meta"]:
config["meta"]["original_filename"] = os.path.basename(path)
# do POST and return object
config["mode"] = "register"
result = base.SESSION.post("/api/files", json=config)
return File.from_data(result.json())
@cached
def contents(self):
"""
Return raw file contents (as string).
"""
res = base.SESSION.get("/api/files/{}/contents".format(self.uuid))
return res.json().get("data")
[docs] def download(self, outfile, encoding="UTF-8"):
"""
Download file and store as output filename.
Args:
outfile (str|file): Path to downloaded file or file object to write to.
encoding (str): If outfile is a file object in string mode, the string encoding
to use to decode the binary data from the server.
Note:
Fetches the contents in a single go. For large text files where only a subset may be
needed, consider using the 'contents' endpoint instead.
For large, streamable binary files, such as BAM files, esp REST APIS have support for
ranged data retrieval that is not yet supported by the python client.
"""
contents = base.SESSION.get("/api/files/{}/download".format(self.uuid)).content
# Bytes with a path: open binary and write.
if isinstance(outfile, six.string_types):
with open(outfile, "wb") as file_:
file_.write(contents)
# byte mode file object: write it directly.
elif "b" in outfile.mode:
outfile.write(contents)
# string-mode file object: decode to string using provided encoding.
else:
contents = contents.decode(encoding)
outfile.write(contents)
@cached
def linkable_uri(self):
"""
Return a "linkable" URL.
The returned URL will be relative to the API_SERVER root,
so external consumers should prepend the API_SERVER information.
Examples:
>>> File.create({ 'uri': '/path/to/myfile.txt', 'name': 'myfile' }).linkable_uri()
/api/files/<:uuid>/static
This methood is provided to simplify embedding links ESP files in extensions
and third-party content. Note that the URL format is subject to change
in the future.
"""
return "/api/{}/{}/static".format(self.__api__, self.uuid)