Source code for esp.models.file

# -*- coding: utf-8 -*-
#
# file-related models.
#
# ------------------------------------------------


# imports
# -------
import base64
import datetime
import json
import os

from gems import cached
import six
from six.moves.urllib.parse import urlparse

import esp.base as base
from .__base__ import LinkedModel, BaseModel


# models
# ------
[docs]class File(LinkedModel): """ Object for interacting with files from the ESP database. See the `Usage <./usage.html>`_ and `Examples <./examples.html>`_ pages of the documentation for more context and comprehensive examples of how to create and use this type of objects. Configuration: Create file: .. code-block:: yaml name: My Protocol SOP desc: An SOP file tags: [sop, protocol] uri: relative/path/to/instructions.pdf upload: true type: PDF Create file object with reference to existing file: .. code-block:: yaml name: Large Sequencing File desc: An SOP file tags: [sop, protocol] uri: file:///path/to/local/file/instructions.bam upload: true type: raw Create file object and register as task output: .. code-block:: yaml name: SampleSheet.csv desc: Illumina Run Sheet task_file: Illumina Runsheet uri: /path/to/SampleSheet.csv upload: false type: csv Configuration Notes: * Due to current backend limitations, `uri` inputs can only take the formats: "relative/path", "/absoluate/path", and "file:///absolute/path". * If `upload` is true, uri must resolve to an existing file on the local file system, otherwise an error will be raised. If upload is False, file will only be registered. Upload defaults to false. * The `task_file` parameter is primarily useful when registering pipeline output files (since you can pass in the task instance UUID). * "type" is used internally by ESP and is not necessary a mime type. For instance, the mime type of a bam file is generally application/gzip or application/octet-stream, but the type in ESP might normally be "bam". * mime-type is an acceptable key for specifying the mime-type. If no mime-type is provided, the client will attempt to guess the type and pass that type along as the content type of the uploaded file. Examples: .. code-block:: python >>> from esp.models import File >>> fi = File('My Protocol SOP') >>> fi.name, fi.created_at ('My Protocol SOP', '2019-06-21T16:04:01.199076Z') >>> # show relationships >>> fi.path '/path/to/Lab7_ESP/Data/files/0000/instructions.pdf.6a523a74-6703-4474-a24e-5a463b9d9770' >>> fi.contents '... raw file contents ...' >>> fi.download('local-copy.pdf') Arguments: ident (str): Name or uuid for object. """ __api__ = "files" __api_cls__ = "Lab7File" __allow_update__ = False # NOTE: PUSHING META ON FILE OBJECTS NOT SUPPORTED BY BACKEND __mutable__ = [x for x in BaseModel.__mutable__ if x != "meta"] __exportable__ = BaseModel.__base_exportable__ + [ "uri", "upload", "type", "mime-type", ] __export_format__ = { "uri": lambda x: x.url, "upload": lambda x: True, "mime-type": lambda x: x.meta.get("mime-type"), }
[docs] @classmethod def parse_import(cls, config, overwrite=False, allow_snapshot_uuid_remap=False): """ Create new object in ESP database using config file or other data. Args: config (str, dict, list): Config file or information to use in creating new object. overwrite (bool): Whether or not to delete current entry in the ESP database. """ # process uri uri = config.pop("uri", None) parsed_uri = urlparse(os.path.expandvars(uri), scheme="file") if parsed_uri.scheme == "file" and parsed_uri.netloc: raise ValueError( "File uri specified, but not a valid network location. " "Use: relative/path, /absolute/path, or file:///absolute/path. " "You used: {}".format(uri) ) config["url"] = parsed_uri.geturl() path = parsed_uri.path config.setdefault("meta", {}) # add taskfile dependencies task_file = config.pop("task_file", None) if task_file is not None: # add taskfile dep to existing list deps = config.get("deps", []) task_deps = [x for x in deps if len(x) == 2 and x[1] == "task_file"] if not task_deps: task_uuid = os.environ.get("LAB7_TASK_UUID", None) if not task_uuid: raise ValueError( "Cannot register a task file while no task is running " "unless you explicitly supply the task instance uuid." ) deps.append([task_uuid, "task_file"]) # update tags and meta tags, meta = config.get("tags", []), config.get("meta", {}) if "task_file" not in tags: tags.append("task_file") meta.update({"taskfile_name": task_file}) config.update({"meta": meta, "deps": deps, "tags": tags}) # upload file upload = config.pop("upload", False) if upload: if not (parsed_uri.scheme == "" or parsed_uri.scheme == "file"): raise ValueError( "To upload a file, the defined URL needs be a local file location " f'("file://..."). Your location is: {path}' ) # check for existing file if not os.path.exists(path): raise ValueError(f"Cannot upload missing file: {path}") mimetype = config.pop("mime-type", config["meta"].get("mime-type")) if not mimetype: # attempt to guess the mime type. import mimetypes mimetype = mimetypes.guess_type(path)[0] if not mimetype: # fallback. # TODO: Worth introducing a fallback on magic-string-based # approaches? mimetype = "application/octet-stream" # read file and do upload with open(path, "rb") as fi: files = {"file": (os.path.basename(path), fi, mimetype)} config["mode"] = "upload" config["tags"] = json.dumps(config.get("tags", [])) config["meta"] = json.dumps(config.get("meta", {})) ctype = base.SESSION.session.headers.pop("Content-Type", None) result = base.SESSION.post("/api/files", files=files, data=config) if ctype is not None: base.SESSION.session.headers["Content-Type"] = ctype return File.from_data(result.json()) else: # todo: There is a freakish case where we can have a scheme other than file # in the URL and it still points to a valid file, e. g. # http://something.org/etc/passwd # We probably don't care about that at this point, but something to keep in mind. # make sure modified_time and created_time are present. # note that these are the times of the OS _file_, not the ESP # File. try: stats = os.stat(path) except FileNotFoundError as fnf: ctime = datetime.datetime.now() # note: no proper cross-platform-compatible way to pick up # ctime since on posix-compliant FSs, ctime = mtime. But # reasonable default behavior that callers can override by # supplying meta. mtime = ctime size = 0 mimetype = "application/octet-stream" else: ctime = datetime.datetime.fromtimestamp(stats.st_ctime) mtime = datetime.datetime.fromtimestamp(stats.st_mtime) size = stats.st_size import mimetypes mimetype = mimetypes.guess_type(path)[0] or "application/octet-stream" if "modified_time" not in config["meta"]: config["meta"]["modified_time"] = mtime.isoformat() if "created_time" not in config["meta"]: config["meta"]["created_time"] = ctime.isoformat() if "file_size" not in config["meta"]: config["meta"]["file_size"] = size if "mime-type" in config: mimetype = config.pop("mime-type") elif "mime_type" in config: mimetype = config.pipe("mime_type") if "mime_type" not in config["meta"]: config["meta"]["mime_type"] = mimetype if "original_filename" not in config["meta"]: config["meta"]["original_filename"] = os.path.basename(path) # do POST and return object config["mode"] = "register" result = base.SESSION.post("/api/files", json=config) return File.from_data(result.json())
@cached def contents(self): """ Return raw file contents (as string). """ res = base.SESSION.get("/api/files/{}/contents".format(self.uuid)) return res.json().get("data")
[docs] def download(self, outfile, encoding="UTF-8"): """ Download file and store as output filename. Args: outfile (str|file): Path to downloaded file or file object to write to. encoding (str): If outfile is a file object in string mode, the string encoding to use to decode the binary data from the server. Note: Fetches the contents in a single go. For large text files where only a subset may be needed, consider using the 'contents' endpoint instead. For large, streamable binary files, such as BAM files, esp REST APIS have support for ranged data retrieval that is not yet supported by the python client. """ contents = base.SESSION.get("/api/files/{}/download".format(self.uuid)).content # Bytes with a path: open binary and write. if isinstance(outfile, six.string_types): with open(outfile, "wb") as file_: file_.write(contents) # byte mode file object: write it directly. elif "b" in outfile.mode: outfile.write(contents) # string-mode file object: decode to string using provided encoding. else: contents = contents.decode(encoding) outfile.write(contents)
@cached def linkable_uri(self): """ Return a "linkable" URL. The returned URL will be relative to the API_SERVER root, so external consumers should prepend the API_SERVER information. Examples: >>> File.create({ 'uri': '/path/to/myfile.txt', 'name': 'myfile' }).linkable_uri() /api/files/<:uuid>/static This methood is provided to simplify embedding links ESP files in extensions and third-party content. Note that the URL format is subject to change in the future. """ return "/api/{}/{}/static".format(self.__api__, self.uuid)