Automatic grouping of files with Python

wulle · June 25, 2020, 11:19am

Dear users,

from a parallel simulation with many partitions I get a separate .vtu file from each processor and for each time step. My goal is to automate the generation of .vtk multiblock files (so basically grouping my files into the different time steps).

All files have the naming convention:

name_PPPPTTTTT>.vtu

with P as the (zero padded) partition number and T as the (zero padded) timestep number.

Is there a simple way of achieving this grouping?

So far I was able to load all of the files in to an array. But the grouping did not work as expected.

from paraview.simple import *
import fnmatch
import os

# from https://stackoverflow.com/questions/431684/how-do-i-change-the-working-directory-in-python/13197763#13197763
class cd:
    """Context manager for changing the current working directory"""
    def __init__(self, newPath):
        self.newPath = os.path.expanduser(newPath)
    def __enter__(self):
        self.savedPath = os.getcwd()
        os.chdir(self.newPath)
    def __exit__(self, etype, value, traceback):
        os.chdir(self.savedPath)

def getFiles(directory, pattern = "P*.vtu"):
    return fnmatch.filter(os.listdir(directory), pattern)

def getFileIndizes(directory):
    return [file.split("_")[1].split(".")[0] for file in getFiles(directory)]

def getPartitionList(directory):
    return list({int(index[0:4]) for index in getFileIndizes(directory)})

def getTimeStepList(directory):
    return list({int(time[4:]) for time in getFileIndizes(directory)})

def loadFile(fileName):
    return XMLUnstructuredGridReader(FileName=fileName)


if __name__ == "__main__":

    loadedFiles = [[]]
    directory = os.path.join('.', 'Paraview')

# load file names in array: [ [p1t1, p2t1, p3t1 ,...], [p1t2, p2t2, p3t2, ...] ]
    for tIndex, time in enumerate(getTimeStepList(directory)):
        loadedFiles.append([])
        for partition in getPartitionList(directory):
            currentFile = fnmatch.filter(os.listdir(directory), f"P*_{partition:04d}{time:05d}.vtu")
            with cd(directory):
                loadedFiles[tIndex].append(loadFile(currentFile))
    loadedFiles.pop(-1)

    groups = []
    for timeStep in loadedFiles:
        groups.append(GroupDatasets(Input=timeStep))

    for group in groups:
        SaveData(os.path.join(directory, 'output.vtm'), proxy=group, PointDataArrays=['Displacements', 'Principal Stress', 'Stress/Strain'],
        Writetimestepsasfileseries=1)

waveman · December 24, 2021, 8:35am

Grouping works with a list of strings.

Set up a reader:

reader = OpenDataFile(list_of_path_strings)
Show(reader)
Render()

I find it convenient to:

from pathlib import Path
path_str = '/homes/user_name/data'
path = Path(path_str)

all_files = path.glob('*.vtu')

list_of_path_strings = [f for f in all_files if f.stem.find('P*.vtu') > -1]