Skip to content

Proxy Storage Locations in Synapse

A proxy storage location delegates file access to a proxy server that controls authentication and access to the underlying storage. Synapse stores only the metadata; the proxy server handles the actual file retrieval.

This tutorial demonstrates how to create a proxy storage location, register a file via a ProxyFileHandle, and associate it with a Synapse File entity.

Read more about Custom Storage Locations

Tutorial Purpose

In this tutorial you will:

  1. Set up and get a project
  2. Create a proxy storage location and assign it to a folder
  3. Register a file by creating a ProxyFileHandle via the REST API
  4. Associate the ProxyFileHandle with a Synapse File entity

Prerequisites

1. Set up and get project

import asyncio
import hashlib
import json
import os

import synapseclient
from synapseclient.models import (
    File,
    Folder,
    Project,
    StorageLocation,
    StorageLocationType,
)

syn = synapseclient.login()

my_project = Project(name="My uniquely named project about Alzheimer's Disease").get()

2. Create a proxy storage location

Create a StorageLocation of type PROXY, providing your proxy server URL and the shared secret key. Setting benefactor_id to the project or folder ensures that access control is inherited from the project or folder. Assign it to a folder so that files uploaded there are served through the proxy.

# Replace with your proxy server URL and provide the shared secret key via the
# MY_PROXY_SECRET_KEY environment variable.
MY_PROXY_URL = "https://my-proxy-server.example.com"
MY_PROXY_SECRET_KEY = os.environ.get("MY_PROXY_SECRET_KEY")

# Replace with the path to a local file to register via the proxy
FILE_PATH = "/path/to/your/file.csv"

# Use this when a proxy server controls access to the underlying storage.
my_proxy_folder = Folder(name="my-folder-for-proxy", parent_id=my_project.id)
my_proxy_folder = my_proxy_folder.store()

proxy_storage = StorageLocation(
    storage_type=StorageLocationType.PROXY,
    proxy_url=MY_PROXY_URL,
    secret_key=MY_PROXY_SECRET_KEY,
    benefactor_id=my_project.id,
    description="Proxy-controlled storage",
).store()

print(f"Created proxy storage location: {proxy_storage.storage_location_id}")
print(f"  Proxy URL: {proxy_storage.proxy_url}")
print(f"  Benefactor ID: {proxy_storage.benefactor_id}")

my_proxy_folder.set_storage_location(
    storage_location_id=proxy_storage.storage_location_id
)
my_proxy_folder_storage_location = my_proxy_folder.get_project_setting()
You'll notice the output looks like:
Created proxy storage location: 12345
  Proxy URL: https://my-proxy-server.example.com
  Benefactor ID: syn123456

3. Register a file via ProxyFileHandle

Files in proxy storage are not uploaded through the UI or Python client. Instead, you register a file that already exists on the proxy server by posting a ProxyFileHandle to the Synapse file service. You provide the file's MD5, size, and the relative path used by the proxy to serve it.

with open(FILE_PATH, "rb") as f:
    content_md5 = hashlib.md5(f.read(), usedforsecurity=False).hexdigest()
file_size = os.path.getsize(FILE_PATH)
file_name = os.path.basename(FILE_PATH)


async def create_proxy_file_handle() -> str:
    proxy_file_handle = await syn.rest_post_async(
        "/externalFileHandle/proxy",
        body=json.dumps(
            {
                "concreteType": "org.sagebionetworks.repo.model.file.ProxyFileHandle",
                "storageLocationId": proxy_storage.storage_location_id,
                "filePath": file_name,
                "fileName": file_name,
                "contentType": "text/csv",
                "contentMd5": content_md5,
                "contentSize": file_size,
            }
        ),
        endpoint=syn.fileHandleEndpoint,
    )
    print(f"File handle ID: {proxy_file_handle['id']}")
    return proxy_file_handle["id"]


proxy_file_handle_id = asyncio.run(create_proxy_file_handle())
You'll notice the output looks like:
{"id": ..., "etag":..., ..., "filePath":...}

4. Associate the ProxyFileHandle with a File entity

Create a File entity using the data_file_handle_id returned above. Synapse stores the metadata and uses the ProxyFileHandle to serve downloads through your proxy server.

proxy_file = File(
    parent_id=my_proxy_folder.id,
    name=file_name,
    data_file_handle_id=proxy_file_handle_id,
).store()
print(f"Synapse entity: {proxy_file.id}")

Source code for this tutorial

Click to show me
"""Tutorial code for creating a Proxy storage location and uploading a file via ProxyFileHandle."""

import asyncio
import hashlib
import json
import os

import synapseclient
from synapseclient.models import (
    File,
    Folder,
    Project,
    StorageLocation,
    StorageLocationType,
)

syn = synapseclient.login()

my_project = Project(name="My uniquely named project about Alzheimer's Disease").get()

# Replace with your proxy server URL and provide the shared secret key via the
# MY_PROXY_SECRET_KEY environment variable.
MY_PROXY_URL = "https://my-proxy-server.example.com"
MY_PROXY_SECRET_KEY = os.environ.get("MY_PROXY_SECRET_KEY")

# Replace with the path to a local file to register via the proxy
FILE_PATH = "/path/to/your/file.csv"

# Use this when a proxy server controls access to the underlying storage.
my_proxy_folder = Folder(name="my-folder-for-proxy", parent_id=my_project.id)
my_proxy_folder = my_proxy_folder.store()

proxy_storage = StorageLocation(
    storage_type=StorageLocationType.PROXY,
    proxy_url=MY_PROXY_URL,
    secret_key=MY_PROXY_SECRET_KEY,
    benefactor_id=my_project.id,
    description="Proxy-controlled storage",
).store()

print(f"Created proxy storage location: {proxy_storage.storage_location_id}")
print(f"  Proxy URL: {proxy_storage.proxy_url}")
print(f"  Benefactor ID: {proxy_storage.benefactor_id}")

my_proxy_folder.set_storage_location(
    storage_location_id=proxy_storage.storage_location_id
)
my_proxy_folder_storage_location = my_proxy_folder.get_project_setting()

# Register a file in the proxy storage location by creating a ProxyFileHandle via
# the REST API, then associate it with a Synapse File entity.
with open(FILE_PATH, "rb") as f:
    content_md5 = hashlib.md5(f.read(), usedforsecurity=False).hexdigest()
file_size = os.path.getsize(FILE_PATH)
file_name = os.path.basename(FILE_PATH)


async def create_proxy_file_handle() -> str:
    proxy_file_handle = await syn.rest_post_async(
        "/externalFileHandle/proxy",
        body=json.dumps(
            {
                "concreteType": "org.sagebionetworks.repo.model.file.ProxyFileHandle",
                "storageLocationId": proxy_storage.storage_location_id,
                "filePath": file_name,
                "fileName": file_name,
                "contentType": "text/csv",
                "contentMd5": content_md5,
                "contentSize": file_size,
            }
        ),
        endpoint=syn.fileHandleEndpoint,
    )
    print(f"File handle ID: {proxy_file_handle['id']}")
    return proxy_file_handle["id"]


proxy_file_handle_id = asyncio.run(create_proxy_file_handle())
proxy_file = File(
    parent_id=my_proxy_folder.id,
    name=file_name,
    data_file_handle_id=proxy_file_handle_id,
).store()
print(f"Synapse entity: {proxy_file.id}")

References used in this tutorial

See also