Reader plugin with multiple input files

Devin_Richard_Bayly · October 14, 2022, 3:30am

Hi there,

I have some 3d point cloud time series data that I’m visualizing for a researcher. I previously wrote a programmable source to enable loading their data and assigning each file to a time step. This has worked well for me, but when it was time for them to switch to a file located in a new place they ended up leaving syntax errors in the script as a result of changing the data’s path.

To help paint the picture of what’s been done so far I’ll include a short screen cast, and these 2 scripts use for the programmable source’s RequestData script

def GetUpdateTimestep(algorithm):
    """Returns the requested time value, or None if not present"""
    executive = algorithm.GetExecutive()
    outInfo = executive.GetOutputInformation(0)
    return outInfo.Get(executive.UPDATE_TIME_STEP()) \
              if outInfo.Has(executive.UPDATE_TIME_STEP()) else None

# This is the requested time-step. This may not be exactly equal to the
# timesteps published in RequestInformation(). Your code must handle that
# correctly.
req_time = int(GetUpdateTimestep(self))
#print(req_time)
from pathlib import Path
import numpy as np

from vtk.numpy_interface import algorithms as algs
from vtk.numpy_interface import dataset_adapter as dsa
def srtkey(a):
    return str(a)
pth = Path("/xdisk/chrisreidy/baylyd/Sama_lidar/temp/babel_pcl")
npy_pcls = sorted(list(Path(pth).rglob("*.npy")),key=srtkey)

npy_pcl =  npy_pcls[req_time]
all_data = np.load(npy_pcl)
data = all_data[:,:3]
intensity = all_data[:,3]
#print(data)


# make vtk points
pts = vtk.vtkPoints()
pts.SetData(dsa.numpyTovtkDataArray(data,"Points"))

output.SetPoints(pts)
#make single cell
numpts = pts.GetNumberOfPoints()
ids = vtk.vtkIdList()
ids.SetNumberOfIds(numpts)
for a in range(numpts):
    ids.SetId(a,a)

output.Allocate(1)
output.InsertNextCell(vtk.VTK_POLY_VERTEX,ids)
#add scalar data to output
output.PointData.append(intensity,"intensity")

and the RequestInformation script

# Code for 'RequestInformation Script'.
from pathlib import Path
def setOutputTimesteps(algorithm, timesteps):
    "helper routine to set timestep information"
    executive = algorithm.GetExecutive()
    outInfo = executive.GetOutputInformation(0)

    outInfo.Remove(executive.TIME_STEPS())
    for timestep in timesteps:
        outInfo.Append(executive.TIME_STEPS(), timestep)

    outInfo.Remove(executive.TIME_RANGE())
    outInfo.Append(executive.TIME_RANGE(), timesteps[0])
    outInfo.Append(executive.TIME_RANGE(), timesteps[-1])

# As an example, let's say we have 4 files in the file series that we
# want to say are producing time 0, 10, 20, and 30.
pth = Path("/xdisk/chrisreidy/baylyd/Sama_lidar/temp/babel_pcl")
npy_pcls = list(Path(pth).rglob("*.npy"))
times = [i for i,f in enumerate(npy_pcls)]
print("times are",times)
setOutputTimesteps(self, times)

I want to help prevent this from happening by writing a reader plugin that will just allow them to use the file browser to locate their data instead of using the programmable source script. My plugin seems to have 3 issues I’m struggling to find documented for python plugins

how to make a python plugin reader read a folder’s contents
how to use each file in the folder as a time step
my vtkPolyData output’s PointData attribute appears to not work the same way as in the programmable source code evidenced by this error File "C:\Users\ohmeg\Documents\paraview_learning\my_test.py", line 80, in RequestData output.PointData.append(intensity,"intensity") AttributeError: 'vtkmodules.vtkCommonDataModel.vtkPolyData' object has no attribute 'PointData'

Here’s the python plugin as I have it written so far.


from pathlib import Path
# same imports as earlier.
from vtkmodules.vtkCommonDataModel import vtkDataSet
from vtkmodules.util.vtkAlgorithm import VTKPythonAlgorithmBase
from vtkmodules.numpy_interface import dataset_adapter as dsa
from paraview.util.vtkAlgorithm import *
from paraview.simple import *

# new module for ParaView-specific decorators.

# to add a source, instead of a filter, use the `smproxy.source` decorator.
@smproxy.source(label="Sama Lidar Source")
class SamaLidarSource(VTKPythonAlgorithmBase):
    """this makes it easier to load a bunch of files from the system"""
    def __init__(self):
        VTKPythonAlgorithmBase.__init__(self,
                nInputPorts=0,
                nOutputPorts=1,
                outputType='vtkPolyData')
        self._filename = None

    def _GetUpdateTimestep(self):
        """Returns the requested time value, or None if not present"""
        executive = self.GetExecutive()
        outInfo = executive.GetOutputInformation(0)
        return outInfo.Get(executive.UPDATE_TIME_STEP()) \
                if outInfo.Has(executive.UPDATE_TIME_STEP()) else None
    def setOutputTimesteps(self):
        "helper routine to set timestep information"
        executive = self.GetExecutive()
        outInfo = executive.GetOutputInformation(0)

        outInfo.Remove(executive.TIME_STEPS())
        for timestep in self.timesteps:
            outInfo.Append(executive.TIME_STEPS(), timestep)

        outInfo.Remove(executive.TIME_RANGE())
        outInfo.Append(executive.TIME_RANGE(), self.timesteps[0])
        outInfo.Append(executive.TIME_RANGE(), self.timesteps[-1])
        print(outInfo)

    @smproperty.doublevector(name="TimestepValues", information_only="1", si_class="vtkSITimeStepsProperty")
    def GetTimestepValues(self):
        return self.timesteps()
    def RequestInformation(self, request, inInfoVec, outInfoVec):
        self.setOutputTimesteps()
        return 1
    def RequestData(self, request, inInfo, outInfo):
        from vtkmodules.vtkCommonDataModel import vtkPolyData
        import vtk
        from pathlib import Path
        import numpy as np

        from vtk.numpy_interface import algorithms as algs
        from vtk.numpy_interface import dataset_adapter as dsa
        print("got data request")
        req_time = int(self._GetUpdateTimestep())
        print(req_time)
        output = vtkPolyData.GetData(outInfo, 0)
        npy_pcl =  self.npy_pcls[req_time]
        all_data = np.load(npy_pcl)
        data = all_data[:,:3]
        intensity = all_data[:,3]
        # make vtk points
        pts = vtk.vtkPoints()
        pts.SetData(dsa.numpyTovtkDataArray(data,"Points"))

        output.SetPoints(pts)
        #make single cell
        numpts = pts.GetNumberOfPoints()
        ids = vtk.vtkIdList()
        ids.SetNumberOfIds(numpts)
        for a in range(numpts):
            ids.SetId(a,a)

        output.Allocate(1)
        output.InsertNextCell(vtk.VTK_POLY_VERTEX,ids)
        #add scalar data to output
        output.PointData.append(intensity,"intensity")
        return 1
    def do_step(self):
        print("got called to change for some reason")


    @smproperty.stringvector(name="FileName")
    @smdomain.filelist()
    @smhint.filechooser(extensions="npy", file_description="Numpy pcd")
    def SetFileName(self, name):
        """Specify filename for the file to read."""
        print(name)
        if self._filename != name and not name is None:
            self._filename = name
            print("cool just set a file name",name,name is None)
            ## idea is that we should now create a path from the name,
            ## get the parent
            ## rglob it for the other npys
            ## use them to establish the increments
            # As an example, let's say we have 4 files in the file series that we
            pth = Path(name)
            self.npy_pcls = list(pth.parent.rglob("*.npy"))
            self.npy_pcls.sort()
            self.timesteps = [i for i,f in enumerate(self.npy_pcls)]
            # print("times are",times)
            self.setOutputTimesteps()
                        # self.Modified()

Using information from standard examples I’ve been reading 1,2 I came up with a work around where I select a single file from the series and then use that file’s path to determine the parent which I can then query for the paths of all the individual files. This is a pretty unfortunate hack, and I was excited when on this forum I found a useful similar question which seems related to Issue 1. Unfortunately I’m not sure how to decipher the relevant parts of the AMReX reader XML

I’m pretty much lost with regards to how I need to tackle issues 2 & 3 though. Any help is greatly appreciated!

Devin_Richard_Bayly · October 17, 2022, 5:58pm

I went with a work around that appears to be all I need for the moment. The python algorithm examples has a nice table generator that works with time, so I just overhauled that and threw in a “Table to Point” filter. Here’s the plugin in case it’s helpful for people.

"""This module demonstrates various ways of adding
VTKPythonAlgorithmBase subclasses as filters, sources, readers,
and writers in ParaView"""


# This is module to import. It provides VTKPythonAlgorithmBase, the base class
# for all python-based vtkAlgorithm subclasses in VTK and decorators used to
# 'register' the algorithm with ParaView along with information about UI.
from paraview.util.vtkAlgorithm import *
import uuid
from pathlib import Path



#------------------------------------------------------------------------------
# A reader example.
#------------------------------------------------------------------------------
def createModifiedCallback(anobject):
    import weakref
    weakref_obj = weakref.ref(anobject)
    anobject = None
    def _markmodified(*args, **kwars):
        o = weakref_obj()
        if o is not None:
            o.Modified()
    return _markmodified

# To add a reader, we can use the following decorators
#   @smproxy.source(name="PythonCSVReader", label="Python-based CSV Reader")
#   @smhint.xml("""<ReaderFactory extensions="csv" file_description="Numpy CSV files" />""")
# or directly use the "@reader" decorator.
@smproxy.reader(name="PythonCSVReader", label="Python-based CSV Reader",
                extensions="npy",
                 file_description="CSV files")
class PythonCSVReader(VTKPythonAlgorithmBase):
    """A reader that reads a CSV file. If the CSV has a "time" column, then
    the data is treated as a temporal dataset"""
    def __init__(self):
        VTKPythonAlgorithmBase.__init__(self, nInputPorts=0, nOutputPorts=1, outputType='vtkTable')
        self._filename = None
        self._ndata = None
        self._timesteps = None
        print("starting",uuid.uuid1())

        from vtkmodules.vtkCommonCore import vtkDataArraySelection
        self._arrayselection = vtkDataArraySelection()
        self._arrayselection.AddObserver("ModifiedEvent", createModifiedCallback(self))

    def _get_raw_data(self, requested_time=None):
        import numpy
        if self._ndata is not None:
            if requested_time is not None:
                ##### load specific npy file from fnmes
                fname = self.fnames[int(requested_time)]
                self._ndata = numpy.load(fname)
                print(self._ndata.dtype)
                # self._ndata.dtype = numpy.dtype([("x",numpy.float32),("y",numpy.float32),("z",numpy.float32),("intensity",numpy.float32)])
                return self._ndata
            return self._ndata

        if self._filename is None:
            # Note, exceptions are totally fine!
            raise RuntimeError("No filename specified")

        # self._ndata = numpy.genfromtxt(self._filename, dtype=None, names=True, delimiter=',', autostrip=True)
        self.pth = Path(self._filename)
        self.fnames = list(self.pth.parent.rglob("*npy"))
        self.fnames.sort()
        times =  [i for i,e in enumerate(self.fnames)]
        self._ndata = 0
        self._timesteps = times

        return self._get_raw_data(requested_time)

    def _get_timesteps(self):
        self._get_raw_data()
        return self._timesteps if self._timesteps is not None else None

    def _get_update_time(self, outInfo):
        executive = self.GetExecutive()
        timesteps = self._get_timesteps()
        if timesteps is None or len(timesteps) == 0:
            return None
        elif outInfo.Has(executive.UPDATE_TIME_STEP()) and len(timesteps) > 0:
            utime = outInfo.Get(executive.UPDATE_TIME_STEP())
            print("using inner method get update time",utime)
            dtime = timesteps[0]
            for atime in timesteps:
                if atime > utime:
                    return dtime
                else:
                    dtime = atime
            return dtime
        else:
            assert(len(timesteps) > 0)
            return timesteps[0]

    def _get_array_selection(self):
        return self._arrayselection

    @smproperty.stringvector(name="FileName")
    @smdomain.filelist()
    @smhint.filechooser(extensions="npy", file_description="Numpy CSV files")
    def SetFileName(self, name):
        """Specify filename for the file to read."""
        print(name)
        if self._filename != name:
            self._filename = name
            self._ndata = None
            self._timesteps = None
            self.Modified()

    @smproperty.doublevector(name="TimestepValues", information_only="1", si_class="vtkSITimeStepsProperty")
    def GetTimestepValues(self):
        print("getting time steps")
        return self._get_timesteps()

    # Array selection API is typical with readers in VTK
    # This is intended to allow ability for users to choose which arrays to
    # load. To expose that in ParaView, simply use the
    # smproperty.dataarrayselection().
    # This method **must** return a `vtkDataArraySelection` instance.
    @smproperty.dataarrayselection(name="Arrays")
    def GetDataArraySelection(self):
        return self._get_array_selection()

    def RequestInformation(self, request, inInfoVec, outInfoVec):
        print("requesting information")
        executive = self.GetExecutive()
        outInfo = outInfoVec.GetInformationObject(0)
        outInfo.Remove(executive.TIME_STEPS())
        outInfo.Remove(executive.TIME_RANGE())

        timesteps = self._get_timesteps()
        if timesteps is not None:
            for t in timesteps:
                outInfo.Append(executive.TIME_STEPS(), t)
            outInfo.Append(executive.TIME_RANGE(), timesteps[0])
            outInfo.Append(executive.TIME_RANGE(), timesteps[-1])
        return 1

    def RequestData(self, request, inInfoVec, outInfoVec):
        print("requesting data")
        from vtkmodules.vtkCommonDataModel import vtkTable
        from vtkmodules.numpy_interface import dataset_adapter as dsa

        data_time = self._get_update_time(outInfoVec.GetInformationObject(0))
        raw_data = self._get_raw_data(data_time)
        output = dsa.WrapDataObject(vtkTable.GetData(outInfoVec, 0))
        print(raw_data)
        # for name in raw_data.dtype.names:
        #     if self._arrayselection.ArrayIsEnabled(name):
        #         output.RowData.append(raw_data[name], name)
        output.RowData.append(raw_data[:,0],"x")
        output.RowData.append(raw_data[:,1],"y")
        output.RowData.append(raw_data[:,2],"z")
        output.RowData.append(raw_data[:,3],"intensity")

        if data_time is not None:
            output.GetInformation().Set(output.DATA_TIME_STEP(), data_time)
        return 1