Data Import and Export

AI-Generated Documentation

This documentation was generated with assistance from AI. While we strive for accuracy, errors may be present. If you find issues, unclear explanations, or have suggestions for improvement, please report them on GitHub.

Ossify provides multiple ways to load and save cellular morphology data, from native ossify formats to external sources like CAVEclient and legacy MeshWork files. This guide covers all the import and export options available.

Native Ossify Format

Saving Cells

The native .osy format preserves all ossify data structures, including layers, annotations, linkages, and metadata.

import ossify

# Save to current directory with automatic naming
cell = ossify.Cell(name="my_neuron")
# ... add layers and annotations ...

ossify.save_cell(cell)  # Creates "my_neuron.osy"

# Save with explicit path
ossify.save_cell(cell, "path/to/my_cell.osy")

# Save with overwrite protection
ossify.save_cell(cell, "existing_file.osy", allow_overwrite=False)  # Raises error if exists
ossify.save_cell(cell, "existing_file.osy", allow_overwrite=True)   # Overwrites existing

# Save to file object
with open("my_cell.osy", "wb") as f:
    ossify.save_cell(cell, f)

Loading Cells

# Load from file path
cell = ossify.load_cell("path/to/my_cell.osy")

# Load from file object
with open("my_cell.osy", "rb") as f:
    cell = ossify.load_cell(f)

# Inspect loaded cell
cell.describe()
print(f"Loaded cell '{cell.name}' with {len(cell.layers.names)} layers")

Cloud Storage Support

Ossify supports cloud storage through the cloudfiles library:

# Save to cloud storage
ossify.save_cell(cell, "gs://my-bucket/cells/neuron_001.osy")  # Google Cloud
ossify.save_cell(cell, "s3://my-bucket/cells/neuron_001.osy")  # AWS S3

# Load from cloud storage
cell = ossify.load_cell("gs://my-bucket/cells/neuron_001.osy")
cell = ossify.load_cell("s3://my-bucket/cells/neuron_001.osy")

# Also supports local file:// URLs
cell = ossify.load_cell("file:///absolute/path/to/cell.osy")

Advanced File Management

# Using CellFiles for advanced operations
from ossify import CellFiles

# Initialize file manager
cf = CellFiles("path/to/directory")  # Local directory
cf = CellFiles("gs://my-bucket/cells")  # Cloud storage

# Check if writable
print(f"Can save: {cf.saveable}")
print(f"Is remote: {cf.remote}")

# Save and load with manager
cf.save(cell, "neuron_001.osy", allow_overwrite=True)
loaded_cell = cf.load("neuron_001.osy")

CAVEclient Integration

Load cells directly from connectomics databases using CAVEclient:

Basic CAVEclient Import

from caveclient import CAVEclient
import ossify

# Initialize client (replace with your datastack)
client = CAVEclient("minnie65_public")

# Load basic cell (skeleton + L2 graph)
root_id = 864691135639806264  # Example root ID
cell = ossify.load_cell_from_client(
    root_id=root_id,
    client=client
)

print(f"Loaded cell {cell.name}")
print(f"Skeleton: {cell.skeleton.n_vertices} vertices")
print(f"Graph: {cell.graph.n_vertices} L2 vertices")

CAVEclient Import with Synapses

# Load with synapse data
cell = ossify.load_cell_from_client(
    root_id=root_id,
    client=client,
    synapses=True,                    # Include synapse annotations
    include_partner_root_id=True,     # Include partner neuron IDs
    omit_self_synapses=True,          # Remove autapses (usually artifacts)
)

print(f"Pre-synaptic sites: {len(cell.annotations.pre_syn.vertices)}")
print(f"Post-synaptic sites: {len(cell.annotations.post_syn.vertices)}")

# Access synapse metadata
pre_synapses = cell.annotations.pre_syn
partner_ids = pre_synapses.get_feature("post_pt_root_id") if "post_pt_root_id" in pre_synapses.feature_names else None

CAVEclient Import Options

import datetime

# Specific timestamp for consistency
timestamp = datetime.datetime(2024, 1, 15, 12, 0, 0)

cell = ossify.load_cell_from_client(
    root_id=root_id,
    client=client,
    synapses=True,
    restore_graph=True,               # Include all L2 graph edges (slower)
    restore_properties=True,          # Include all L2 vertex properties
    synapse_spatial_point="ctr_pt_position",  # Synapse coordinate column
    timestamp=timestamp,              # Specific time point
    skeleton_version=4,               # Skeleton service version
)

# Check what was loaded
print(f"L2 graph edges: {len(cell.graph.edges) if cell.graph else 0}")
print(f"L2 vertex properties: {cell.graph.feature_names if cell.graph else []}")
print(f"Skeleton features: {cell.skeleton.feature_names if cell.skeleton else []}")

Working with CAVEclient Data

# CAVEclient loads create specific structure:
# - cell.graph: L2 spatial graph with coordinates in nanometers
# - cell.skeleton: Skeleton with radius and compartment features
# - cell.annotations.pre_syn/post_syn: Synaptic sites (if synapses=True)

# L2 graph coordinates are in nanometers
if cell.graph:
    l2_coords = cell.graph.vertices
    print(f"L2 coordinate range (nm): {l2_coords.min(axis=0)} to {l2_coords.max(axis=0)}")

# Skeleton coordinates are also in nanometers
if cell.skeleton:
    skel_coords = cell.skeleton.vertices  
    print(f"Skeleton coordinate range (nm): {skel_coords.min(axis=0)} to {skel_coords.max(axis=0)}")

    # Check for radius and compartment info
    if "radius" in cell.skeleton.feature_names:
        radius = cell.skeleton.get_feature("radius")
        print(f"Radius range: {radius.min():.2f} - {radius.max():.2f}")

    if "compartment" in cell.skeleton.feature_names:
        compartment = cell.skeleton.get_feature("compartment")
        print(f"Compartments: {np.unique(compartment)}")

Legacy MeshWork Import

Import from legacy MeshWork .h5 files (requires h5py):

Basic MeshWork Import

# Import legacy MeshWork file
cell, node_mask = ossify.import_legacy_meshwork(
    "path/to/meshwork_file.h5",
    l2_skeleton=True,        # Import mesh as L2 graph (True) or mesh layer (False)
    as_pcg_skel=False       # Process PCG skeleton annotations (False for raw import)
)

print(f"Imported cell: {cell.name}")
print(f"Node mask shape: {node_mask.shape}")
print(f"Layers: {cell.layers.names}")
print(f"Annotations: {cell.annotations.names}")

# The node_mask indicates which mesh vertices correspond to skeleton nodes
# It's not automatically applied - you can apply it manually if needed
if cell.graph and len(node_mask) == cell.graph.n_vertices:
    # Apply mask to keep only skeleton-corresponding vertices
    masked_graph = cell.graph.apply_mask(node_mask, as_positional=True)
    print(f"Masked graph: {masked_graph.n_vertices} vertices")

MeshWork Import Options

# Import as mesh layer instead of L2 graph
cell, node_mask = ossify.import_legacy_meshwork(
    "meshwork_file.h5",
    l2_skeleton=False,       # Import mesh data as actual mesh layer
    as_pcg_skel=False
)

print(f"Has mesh: {cell.mesh is not None}")
if cell.mesh:
    print(f"Mesh: {cell.mesh.n_vertices} vertices, {len(cell.mesh.faces)} faces")

# Process PCG skeleton annotations automatically
cell, node_mask = ossify.import_legacy_meshwork(
    "meshwork_file.h5", 
    l2_skeleton=True,
    as_pcg_skel=True        # Automatically process segment properties, etc.
)

# PCG processing moves annotation data to layer features
print(f"Skeleton features after PCG processing: {cell.skeleton.feature_names}")
print(f"Graph features after PCG processing: {cell.graph.feature_names if cell.graph else []}")

Understanding MeshWork Structure

# MeshWork files typically contain:
# - mesh: 3D mesh data (vertices, faces, link_edges)
# - skeleton: Tree skeleton mapped to mesh
# - annotations: Various data tables

# After import:
cell.describe()

# Check linkages between layers
if cell.skeleton and cell.graph:
    # Skeleton is linked to graph/mesh
    skeleton_to_graph = cell.skeleton.map_index_to_layer("graph", as_positional=False)
    print(f"Skeleton-graph mappings: {len(skeleton_to_graph)}")

# Annotations may be linked to mesh/graph or skeleton
for anno_name in cell.annotations.names:
    anno = cell.annotations[anno_name]
    print(f"Annotation {anno_name}: {anno.n_vertices} points")

Working with External Formats

Exporting to External Libraries

# Export mesh to trimesh
if cell.mesh:
    tmesh = cell.mesh.as_trimesh
    # Can then save to various formats via trimesh
    tmesh.export("output.ply")
    tmesh.export("output.obj")

# Export skeleton to NetworkX
if cell.skeleton:
    import networkx as nx
    G = nx.from_edgelist(cell.skeleton.edges)

    # Add vertex positions as node attributes
    pos_dict = {vid: cell.skeleton.vertices[i] for i, vid in enumerate(cell.skeleton.vertex_index)}
    nx.set_node_attributes(G, pos_dict, 'pos')

    # Save as GraphML
    nx.write_graphml(G, "skeleton.graphml")

# Export to pandas/CSV
skeleton_data = cell.skeleton.nodes  # DataFrame with coordinates + features
skeleton_data.to_csv("skeleton_data.csv")

annotation_data = cell.annotations.synapses.nodes if "synapses" in cell.annotations.names else None
if annotation_data is not None:
    annotation_data.to_csv("synapse_data.csv")

Creating Cells from External Data

import pandas as pd
import numpy as np

# Load from CSV/pandas
vertex_data = pd.read_csv("neuron_vertices.csv")
edge_data = pd.read_csv("neuron_edges.csv")

# Create cell from DataFrames
cell = ossify.Cell(name="imported_neuron")

# Add skeleton from DataFrame
cell.add_skeleton(
    vertices=vertex_data,
    edges=edge_data[["source", "target"]].values,
    root=vertex_data.iloc[0]["vertex_id"],  # First vertex as root
    spatial_columns=["x", "y", "z"],
    vertex_index="vertex_id",
    features={
        "radius": "radius_um", 
        "compartment": "compartment_type"
    }
)

# Add annotations from CSV
synapse_data = pd.read_csv("synapses.csv")
cell.add_point_annotations(
    name="synapses",
    vertices=synapse_data,
    spatial_columns=["pos_x", "pos_y", "pos_z"],
    features={"confidence": "conf_score", "type": "synapse_type"}
)

Coordinate System Handling

# Handle different coordinate systems and units
def convert_coordinates(vertices, scale_factor=1000, offset=[0, 0, 0]):
    """Convert coordinates (e.g., micrometers to nanometers)."""
    vertices_scaled = vertices * scale_factor
    vertices_offset = vertices_scaled + offset
    return vertices_offset

# Example: convert from nanometers to micrometers and back
if cell.skeleton:
    # Convert entire cell from nm to μm (all layers and annotations)
    cell_um = cell.transform(lambda x: x / 1000)

    # Or apply more complex transformation
    def complex_transform(coords):
        """Custom transformation: scale and translate."""
        scaled = coords * 0.001  # nm to μm
        translated = scaled + [100, 100, 0]  # Add offset
        return translated

    converted_cell = cell.transform(complex_transform)
    converted_cell.name = f"{cell.name}_converted"

Batch Processing

Processing Multiple Files

import os
from pathlib import Path

def process_cell_directory(input_dir, output_dir, file_pattern="*.h5"):
    """Process all MeshWork files in a directory."""
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)

    for meshwork_file in input_path.glob(file_pattern):
        print(f"Processing {meshwork_file.name}...")

        try:
            # Import MeshWork
            cell, mask = ossify.import_legacy_meshwork(
                str(meshwork_file),
                l2_skeleton=True,
                as_pcg_skel=True
            )

            # Save as ossify format
            output_file = output_path / f"{meshwork_file.stem}.osy"
            ossify.save_cell(cell, str(output_file))

            print(f"  Saved: {output_file.name}")

        except Exception as e:
            print(f"  Error processing {meshwork_file.name}: {e}")

# Process directory
process_cell_directory("legacy_files/", "ossify_files/", "*.h5")

Batch CAVEclient Downloads

def download_multiple_cells(root_ids, client, output_dir):
    """Download multiple cells from CAVEclient."""
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)

    for root_id in root_ids:
        print(f"Downloading {root_id}...")

        try:
            cell = ossify.load_cell_from_client(
                root_id=root_id,
                client=client,
                synapses=True
            )

            # Save with root_id as filename
            output_file = output_path / f"cell_{root_id}.osy"
            ossify.save_cell(cell, str(output_file))

            print(f"  Saved: {output_file.name}")

        except Exception as e:
            print(f"  Error downloading {root_id}: {e}")

# Download list of cells
root_ids = [864691135639806264, 864691135639806265, 864691135639806266]
download_multiple_cells(root_ids, client, "downloaded_cells/")

File Format Details

Ossify Format Structure

The .osy format is a compressed tar archive containing:

cell.osy/
├── metadata.json              # Cell metadata and structure
├── layers/
│   ├── skeleton/
│   │   ├── meta.json         # Layer metadata
│   │   ├── nodes.feather     # Vertex data
│   │   ├── edges.npz         # Edge connectivity
│   │   └── base_properties/  # Cached properties
│   ├── mesh/
│   │   ├── meta.json
│   │   ├── nodes.feather
│   │   └── faces.npz
│   └── graph/
│       ├── meta.json
│       ├── nodes.feather
│       └── edges.npz
├── annotations/
│   ├── synapses/
│   │   ├── meta.json
│   │   └── nodes.feather
│   └── spines/
│       ├── meta.json
│       └── nodes.feather
└── linkage/
    ├── skeleton/mesh/
    │   └── linkage.feather
    └── synapses/skeleton/
        └── linkage.feather

Compression and Performance

# Ossify files use efficient compression
# - Feather format for DataFrames (fast, compact)
# - NPZ format for arrays (compressed numpy)
# - JSON for metadata (human readable)

# Check file sizes
import os
original_size = os.path.getsize("cell.osy")
print(f"Compressed cell file: {original_size / 1024 / 1024:.1f} MB")

# Files are optimized for loading speed and storage efficiency

Key Import/Export Methods

Native Format

ossify.save_cell(cell, file=None, allow_overwrite=False) - Save to ossify format
ossify.load_cell(source) - Load from ossify format
CellFiles(path) - Advanced file management

CAVEclient Integration

ossify.load_cell_from_client(root_id, client, synapses=False, restore_graph=False, ...) - Load from CAVE

Legacy MeshWork

ossify.import_legacy_meshwork(filename, l2_skeleton=True, as_pcg_skel=False) - Import MeshWork files

External Format Support

mesh.as_trimesh - Export mesh to trimesh library
skeleton.nodes.to_csv() - Export to CSV
NetworkX integration for skeleton graphs

Best Practices

Use ossify native format for long-term storage and performance
Set explicit timestamps when using CAVEclient for reproducibility
Apply node masks from MeshWork import based on your analysis needs
Use cloud storage URLs for large-scale collaborative projects
Batch process files with error handling for robustness