Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions scripts/builtin/autoGradientCheck.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# gradient_check.dml
# Usage:
# systemds gradient_check.dml -exec singlenode -nvargs X=... H1=16 H2=8 H3=4 LAYER=1 R=1 C=1 EPS=1e-4

source("scripts/builtin/autoencoder_2layer.dml") as ae

X = read($X)
n = nrow(X)
m = ncol(X)

# ---- hidden layers list (same convention as wrapper)
hidden_layers = list(as.integer($H1))
if(as.integer($H2) > 0)
hidden_layers = append(hidden_layers, list(as.integer($H2)))
if(as.integer($H3) > 0)
hidden_layers = append(hidden_layers, list(as.integer($H3)))

# ---- deterministic-ish permutation input
# If you want strict determinism, call setseed(...) before Rand in SystemDS.
order_rand = Rand(rows=n, cols=1, min=0, max=1, pdf="uniform")
permut = table(seq(1,n,1),
order(target=order_rand, by=1, index.return=TRUE),
n, n)

X_pre = permut %*% X
means = colSums(X_pre)/n
stds = sqrt((colSums(X_pre^2)/n - means*means)*n/(n-1)) + 1e-17
X_pre = (X_pre - means)/stds

# ---- init model
layer_sizes = ae::build_layer_sizes(m, hidden_layers)
layer_count = length(layer_sizes) - 1
[W_list0, b_list0, updW0, updb0] = ae::init_layers(layer_sizes)

# ---- forward + backward (analytic grad)
[act0, primes0, Yhat0, E0] = ae::forward_pass_layers(X_pre, W_list0, b_list0, X_pre)
[grads_W, grads_b] = ae::backward_pass_layers(X_pre, act0, primes0, W_list0, E0)

# grads_W is stored in reverse order: grads_W[1] corresponds to last layer
target_layer = as.integer($LAYER) # 1..layer_count (1=first layer)
idx = layer_count - target_layer + 1
G = as.matrix(grads_W[idx])

r = as.integer($R)
c = as.integer($C)
eps = as.double($EPS)

g_analytic = as.double(G[r,c])

# ---- helper: build W_list with one layer replaced
replace_layer = function(list[unknown] W_list, Integer k, Matrix[Double] Wnew)
return(list[unknown] W_out)
{
W_out = list()
for(i in 1:length(W_list)) {
if(i == k) W_out = append(W_out, list(Wnew))
else W_out = append(W_out, list(as.matrix(W_list[i])))
}
}

# ---- finite difference: perturb W[target_layer][r,c] by +/- eps
Wk = as.matrix(W_list0[target_layer])
nr = nrow(Wk); nc = ncol(Wk)

# sparse perturb matrix with eps at (r,c)
ri = matrix(as.double(r), rows=1, cols=1)
ci = matrix(as.double(c), rows=1, cols=1)
vi = matrix(eps, rows=1, cols=1)
D = table(ri, ci, vi, nr, nc)

W_plus = Wk + D
W_minus = Wk - D

W_list_plus = replace_layer(W_list0, target_layer, W_plus)
W_list_minus = replace_layer(W_list0, target_layer, W_minus)

[_, _, _, E_plus] = ae::forward_pass_layers(X_pre, W_list_plus, b_list0, X_pre)
[_, _, _, E_minus] = ae::forward_pass_layers(X_pre, W_list_minus, b_list0, X_pre)

obj_plus = ae::obj(E_plus)
obj_minus = ae::obj(E_minus)

g_numeric = (obj_plus - obj_minus) / (2 * eps)

# ---- report
den = max(1e-12, abs(g_numeric) + abs(g_analytic))
rel_err = abs(g_numeric - g_analytic) / den

print("GRADCHECK layer=" + target_layer + " r=" + r + " c=" + c)
print(" analytic = " + g_analytic)
print(" numeric = " + g_numeric)
print(" rel_err = " + rel_err)
110 changes: 110 additions & 0 deletions scripts/builtin/autoencoderGeneralized.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
#-------------------------------------------------------------
# Apache SystemDS - autoencoder wrapper
# Prints stable BEFORE/AFTER objective lines for parsing
# and optionally writes outputs (skips if /dev/null).
#-------------------------------------------------------------
source("scripts/builtin/autoencoder_2layer.dml") as ae

# --------- read inputs ---------
X = read($X)

# Build encoder hidden layer list
hidden_layers = list(as.integer($H1))
if(as.integer($H2) > 0)
hidden_layers = append(hidden_layers, list(as.integer($H2)))
if(as.integer($H3) > 0)
hidden_layers = append(hidden_layers, list(as.integer($H3)))

# --------- compute objective BEFORE training ---------
# I compute it on a standardized + permuted copy, consistent with training.
# Create a fixed permutation seed via order_rand so both pre and training match.
n = nrow(X)
order_rand = Rand(rows=n, cols=1, min=0, max=1, pdf="uniform")
permut = table(
seq(1, n, 1),
order(target=order_rand, by=1, index.return=TRUE),
n, n
)
X_pre = permut %*% X

means = colSums(X_pre) / n
stds = sqrt((colSums(X_pre^2)/n - means*means) * n/(n-1)) + 1e-17
X_pre = (X_pre - means) / stds

# Initialize a model once which same initializer used by training
layer_sizes = ae::build_layer_sizes(ncol(X_pre), hidden_layers)
[W_list0, b_list0, updW0, updb0] = ae::init_layers(layer_sizes)

# Forward + objective
[act0, primes0, Yhat0, E0] = ae::forward_pass_layers(X_pre, W_list0, b_list0, X_pre)
obj_before = ae::obj(E0)
print("WRAPPER FULL OBJ BEFORE TRAIN = " + obj_before)

# --------- train ---------
# IMPORTANT: pass order_rand so training uses same permutation as X_pre
[model, hidden] = ae::m_autoencoder(
X=X,
hidden_layers=hidden_layers,
max_epochs=as.integer($EPOCH),
full_obj=as.boolean($FULL_OBJ),
batch_size=as.integer($BATCH),
step=as.double($STEP),
decay=as.double($DECAY),
mu=as.double($MOMENTUM),
method=$METHOD,
mode=$MODE,
utype=$UTYPE,
freq=$FREQ,
k=as.integer($WORKERS),
scheme=$SCHEME,
nbatches=as.integer($NBATCHES),
modelAvg=as.boolean($MODELAVG),
order_rand=order_rand
)

# --------- compute objective AFTER training on SAME X_pre ---------
encoder_layers = length(hidden_layers)
layer_count = 2 * encoder_layers

W_list1 = list()
b_list1 = list()

# model layout in generalized code path is:
# [W1..WL, b1..bL, updW1..updWL, updb1..updbL]
for(i in 1:layer_count)
W_list1 = append(W_list1, list(as.matrix(model[i])))
for(i in 1:layer_count)
b_list1 = append(b_list1, list(as.matrix(model[layer_count + i])))

[act1, primes1, Yhat1, E1] = ae::forward_pass_layers(X_pre, W_list1, b_list1, X_pre)
obj_after = ae::obj(E1)
print("WRAPPER FULL OBJ AFTER TRAIN = " + obj_after)

# --------- optional outputs skip if /dev/null ---------
W1_out = as.matrix(model[1])
Wlast_out = as.matrix(model[layer_count])
hidden_out = hidden

if($W1_out != "/dev/null") write(W1_out, $W1_out)
if($Wlast_out != "/dev/null") write(Wlast_out, $Wlast_out)
if($hidden_out != "/dev/null") write(hidden_out, $hidden_out)
Loading