Multi-Layer Perceptron (MLP) Example#

import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import glob, os
import random

import tensorflow as tf
from tensorflow import keras

physical_devices = tf.config.list_physical_devices('GPU')
    for kgpu in range(len(physical_devices)):
        tf.config.experimental.set_memory_growth(physical_devices[kgpu], True)
    # Invalid device or cannot modify virtual devices once initialized.
Build data pipeline#

input and output variable list#

# in/out variable lists
vars_mli = ['state_t','state_q0001','state_ps','pbuf_SOLIN', 'pbuf_LHFLX', 'pbuf_SHFLX']
vars_mlo = ['ptend_t','ptend_q0001','cam_out_NETSW','cam_out_FLWDS','cam_out_PRECSC','cam_out_PRECC','cam_out_SOLS','cam_out_SOLL','cam_out_SOLSD','cam_out_SOLLD']

tf Dataset pipeline#

mli_mean = xr.open_dataset('./norm_factors/')
mli_min = xr.open_dataset('./norm_factors/')
mli_max = xr.open_dataset('./norm_factors/')
mlo_scale = xr.open_dataset('./norm_factors/')

def load_nc_dir_with_generator(filelist:list):
    def gen():
        for file in filelist:
            # read mli
            ds = xr.open_dataset(file, engine='netcdf4')
            ds = ds[vars_mli]
            # read mlo
            dso = xr.open_dataset(file.replace('.mli.','.mlo.'), engine='netcdf4')
            # make mlo variales: ptend_t and ptend_q0001
            dso['ptend_t'] = (dso['state_t'] - ds['state_t'])/1200 # T tendency [K/s]
            dso['ptend_q0001'] = (dso['state_q0001'] - ds['state_q0001'])/1200 # Q tendency [kg/kg/s]
            dso = dso[vars_mlo]
            # normalizatoin, scaling
            ds = (ds-mli_mean)/(mli_max-mli_min)
            dso = dso*mlo_scale

            # stack
            #ds = ds.stack({'batch':{'sample','ncol'}})
            ds = ds.stack({'batch':{'ncol'}})
            ds = ds.to_stacked_array("mlvar", sample_dims=["batch"], name='mli')
            #dso = dso.stack({'batch':{'sample','ncol'}})
            dso = dso.stack({'batch':{'ncol'}})
            dso = dso.to_stacked_array("mlvar", sample_dims=["batch"], name='mlo')
            yield (ds.values, dso.values)

        output_types=(tf.float64, tf.float64),

Instantiate object here#


# for training

# # First 5 days of each month for the first 6 years
# f_mli1 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.000[123456]-*-0[12345]-*.nc')
# f_mli2 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.0007-01-0[12345]-*.nc')
# f_mli = [*f_mli1, *f_mli2]

# every 10th sample
f_mli1 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.000[123456]-*-*-*.nc')
f_mli2 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.0007-01-*-*.nc')
f_mli = sorted([*f_mli1, *f_mli2])
f_mli = f_mli[::10]

# # debugging
# f_mli = f_mli[0:72*5]

print(f'[TRAIN] Total # of input files: {len(f_mli)}')
print(f'[TRAIN] Total # of columns (nfiles * ncols): {len(f_mli)*384}')
tds = load_nc_dir_with_generator(f_mli)
tds = tds.unbatch()
tds = tds.shuffle(buffer_size=shuffle_buffer, reshuffle_each_iteration=True)
tds = tds.prefetch(buffer_size=4) # in realtion to the batch size

# for validation

# # First 5 days of each month for the following 2 years
# f_mli1 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.0007-0[23456789]-0[12345]-*.nc')
# f_mli2 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.0007-1[012]-0[12345]-*.nc')
# f_mli3 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.000[89]-*-0[12345]-*.nc')
# f_mli_val = [*f_mli1, *f_mli2, *f_mli3]

# every 10th sample
f_mli1 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.0007-0[23456789]-0[12345]-*.nc')
f_mli2 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.0007-1[012]-0[12345]-*.nc')
f_mli3 = glob.glob('/pscratch/sd/s/sungduk/hugging/E3SM-MMF_ne4/train/*/E3SM-MMF.mli.000[89]-*-0[12345]-*.nc')
f_mli_val = sorted([*f_mli1, *f_mli2, *f_mli3])
f_mli_val = f_mli_val[::10]

# # debugging
# f_mli_val = f_mli_val[0:72*5]

print(f'[VAL] Total # of input files: {len(f_mli_val)}')
print(f'[VAL] Total # of columns (nfiles * ncols): {len(f_mli_val)*384}')
tds_val = load_nc_dir_with_generator(f_mli_val)
tds_val = tds_val.shuffle(buffer_size=shuffle_buffer, reshuffle_each_iteration=True)
tds_val = tds_val.prefetch(buffer_size=4) # in realtion to the batch size

# for count_batch in tds.repeat().batch(10).take(1):
#     print(count_batch[0].numpy())
[TRAIN] Total # of input files: 15768
[TRAIN] Total # of columns (nfiles * ncols): 6054912
[VAL] Total # of input files: 864
[VAL] Total # of columns (nfiles * ncols): 331776

ML training#

  • While 4 GPUs are available on the node, using multi GPUs (with ‘tf.distribute.MirroredStrategy()’ strategy) does not speed up training process. It is possibly due to that the current Dataset pipeline is sequential.

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]
# strategy = tf.distribute.MirroredStrategy()
# with strategy.scope():

# model params
input_length = 2*60 + 4
output_length_lin  = 2*60
output_length_relu = 8
output_length = output_length_lin + output_length_relu
n_nodes = 512

# constrcut a model
input_layer    = keras.layers.Input(shape=(input_length,), name='input')
hidden_0       = keras.layers.Dense(n_nodes, activation='relu')(input_layer)
hidden_1       = keras.layers.Dense(n_nodes, activation='relu')(hidden_0)
output_pre     = keras.layers.Dense(output_length, activation='elu')(hidden_1)
output_lin     = keras.layers.Dense(output_length_lin,activation='linear')(output_pre)
output_relu    = keras.layers.Dense(output_length_relu,activation='relu')(output_pre)
output_layer   = keras.layers.Concatenate()([output_lin, output_relu])

model = keras.Model(input_layer, output_layer, name='Emulator')

# compile
model.compile(optimizer=keras.optimizers.Adam(), #optimizer=keras.optimizers.Adam(learning_rate=clr),
Model: "Emulator"
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 124)]        0           []                               
 dense (Dense)                  (None, 512)          64000       ['input[0][0]']                  
 dense_1 (Dense)                (None, 512)          262656      ['dense[0][0]']                  
 dense_2 (Dense)                (None, 128)          65664       ['dense_1[0][0]']                
 dense_3 (Dense)                (None, 120)          15480       ['dense_2[0][0]']                
 dense_4 (Dense)                (None, 8)            1032        ['dense_2[0][0]']                
 concatenate (Concatenate)      (None, 128)          0           ['dense_3[0][0]',                
Total params: 408,832
Trainable params: 408,832
Non-trainable params: 0
# callbacks
# a. tensorboard
tboard_callback = keras.callbacks.TensorBoard(log_dir = './logs_tensorboard',
                                              histogram_freq = 1,)

# b. checkpoint
filepath_checkpoint = 'saved_model/best_model_proto.h5'
checkpoint_callback = keras.callbacks.ModelCheckpoint(filepath=filepath_checkpoint,

# c. csv logger
filepath_csv = 'csv_logger.txt'
csv_callback = keras.callbacks.CSVLogger(filepath_csv, separator=",", append=True)

my_callbacks= [tboard_callback, checkpoint_callback, csv_callback]

# !mkdir logs_tensorboard
# !mkdir saved_model
# Manually shuffling the order of input files.
# "tds = tds.shuffle(buffer_size=<global>, reshuffle_each_iteration=True)" is possible,
# however, it is slow.
# So employing global shuffle (by file names) + local shuffle (using .shuffle).

shuffle_buffer = 12*384 #ncol=384
batch_size= 96 # 384/4

while n < N_EPOCHS:
    tds = load_nc_dir_with_generator(f_mli) # global shuffle by file names
    tds = tds.unbatch()
 # local shuffle by elements    tds = tds.shuffle(buffer_size=shuffle_buffer, reshuffle_each_iteration=False)
    tds = tds.batch(batch_size)
    tds = tds.prefetch(buffer_size=int(shuffle_buffer/384)) # in realtion to the batch size

    tds_val = load_nc_dir_with_generator(f_mli_val)
    tds_val = tds_val.unbatch()
    tds_val = tds_val.shuffle(buffer_size=shuffle_buffer, reshuffle_each_iteration=False)
    tds_val = tds_val.batch(batch_size)
    tds_val = tds_val.prefetch(buffer_size=int(shuffle_buffer/384))
    print(f'Epoch: {n+1}'), 
Epoch: 1
63072/63072 [==============================] - 1096s 17ms/step - loss: 0.0044 - mse: 0.0044 - mae: 0.0254 - accuracy: 0.9727 - val_loss: 0.0041 - val_mse: 0.0041 - val_mae: 0.0244 - val_accuracy: 0.9762
Epoch: 2
63072/63072 [==============================] - 1071s 17ms/step - loss: 0.0041 - mse: 0.0041 - mae: 0.0240 - accuracy: 0.9761 - val_loss: 0.0040 - val_mse: 0.0040 - val_mae: 0.0238 - val_accuracy: 0.9766
Epoch: 3
63072/63072 [==============================] - 1035s 16ms/step - loss: 0.0040 - mse: 0.0040 - mae: 0.0238 - accuracy: 0.9769 - val_loss: 0.0040 - val_mse: 0.0040 - val_mae: 0.0237 - val_accuracy: 0.9774
Epoch: 4
63072/63072 [==============================] - 1035s 16ms/step - loss: 0.0040 - mse: 0.0040 - mae: 0.0236 - accuracy: 0.9773 - val_loss: 0.0040 - val_mse: 0.0040 - val_mae: 0.0239 - val_accuracy: 0.9764
Epoch: 5
63072/63072 [==============================] - 1033s 16ms/step - loss: 0.0039 - mse: 0.0039 - mae: 0.0236 - accuracy: 0.9775 - val_loss: 0.0039 - val_mse: 0.0039 - val_mae: 0.0236 - val_accuracy: 0.9774
Epoch: 6
63072/63072 [==============================] - 1046s 17ms/step - loss: 0.0039 - mse: 0.0039 - mae: 0.0235 - accuracy: 0.9776 - val_loss: 0.0039 - val_mse: 0.0039 - val_mae: 0.0234 - val_accuracy: 0.9780
Epoch: 7
63072/63072 [==============================] - 1033s 16ms/step - loss: 0.0039 - mse: 0.0039 - mae: 0.0235 - accuracy: 0.9778 - val_loss: 0.0039 - val_mse: 0.0039 - val_mae: 0.0235 - val_accuracy: 0.9784
Epoch: 8
63072/63072 [==============================] - 1035s 16ms/step - loss: 0.0039 - mse: 0.0039 - mae: 0.0235 - accuracy: 0.9780 - val_loss: 0.0039 - val_mse: 0.0039 - val_mae: 0.0235 - val_accuracy: 0.9777
Epoch: 9
63072/63072 [==============================] - 1046s 17ms/step - loss: 0.0039 - mse: 0.0039 - mae: 0.0234 - accuracy: 0.9780 - val_loss: 0.0039 - val_mse: 0.0039 - val_mae: 0.0235 - val_accuracy: 0.9762
Epoch: 10
63072/63072 [==============================] - 1049s 17ms/step - loss: 0.0039 - mse: 0.0039 - mae: 0.0234 - accuracy: 0.9782 - val_loss: 0.0039 - val_mse: 0.0039 - val_mae: 0.0235 - val_accuracy: 0.9774
Epoch: 11
63072/63072 [==============================] - 1064s 17ms/step - loss: 0.0038 - mse: 0.0038 - mae: 0.0234 - accuracy: 0.9782 - val_loss: 0.0039 - val_mse: 0.0039 - val_mae: 0.0234 - val_accuracy: 0.9784
Epoch: 12
63072/63072 [==============================] - 1073s 17ms/step - loss: 0.0038 - mse: 0.0038 - mae: 0.0234 - accuracy: 0.9783 - val_loss: 0.0039 - val_mse: 0.0039 - val_mae: 0.0235 - val_accuracy: 0.9786
Epoch: 13
  22445/Unknown - 357s 16ms/step - loss: 0.0038 - mse: 0.0038 - mae: 0.0234 - accuracy: 0.9783