diff --git a/src/benchmarks/rodinia-srad/julia/srad.jl b/src/benchmarks/rodinia-srad/julia/srad.jl
index c9002f468067d8b5c5ec8b5688b233a52c8026fb..72b77890f1c18b2aeb560393d9332abcd96cd7c8 100755
--- a/src/benchmarks/rodinia-srad/julia/srad.jl
+++ b/src/benchmarks/rodinia-srad/julia/srad.jl
@@ -50,24 +50,24 @@ function main(args)
     jW[1] = 1
     jE[cols] = cols
 
-    dN::Matrix{Float32} = zeros(rows,cols)
-    dS::Matrix{Float32} = zeros(rows,cols)
-    dW::Matrix{Float32} = zeros(rows,cols)
-    dE::Matrix{Float32} = zeros(rows,cols)
+    dN::Matrix{Float32} = zeros(cols,rows)
+    dS::Matrix{Float32} = zeros(cols,rows)
+    dW::Matrix{Float32} = zeros(cols,rows)
+    dE::Matrix{Float32} = zeros(cols,rows)
 
     println("Randomizing the input matrix")
 
     @ccall srand(7::Cint)::Cvoid
 
-    I::Matrix{Float32} = zeros(rows, cols)
-    for i in 1:size(I, 1)
-        for j in 1:size(I, 2)
+    I::Matrix{Float32} = zeros(cols,rows)
+    for j in 1:size(I, 1)
+        for i in 1:size(I, 2)
             I[i, j] = (@ccall rand()::Cint) / RAND_MAX
         end
     end
 
     J = exp.(I)
-    c::Matrix{Float32} = zeros(rows,cols)
+    c::Matrix{Float32} = zeros(cols,rows)
 
     println("Start the SRAD main loop")
 
@@ -76,8 +76,8 @@ function main(args)
     for iter in 1:niter
         sum = 0
         sum2 = 0
-        for i in r1:r2
-            for j in c1:c2
+        for j in r1:r2
+            for i in c1:c2
                 tmp = J[i + 1,j + 1]
                 sum += tmp
                 sum2 += tmp * tmp
@@ -88,15 +88,15 @@ function main(args)
         varROI::Float32 = (sum2 / size_R) - meanROI * meanROI
         q0sqr::Float32 = varROI / (meanROI * meanROI)
 
-        Threads.@threads for i in 1:size(J,1)
-            for j in 1:size(J,2)
+        Threads.@threads for j in 1:size(J,2)
+            for i in 1:size(J,1)
                 Jc = J[i,j]
 
                 # directional derivates
-                dN[i,j] = J[iN[i],j] - Jc
-                dS[i,j] = J[iS[i],j] - Jc
-                dW[i,j] = J[i,jW[j]] - Jc
-                dE[i,j] = J[i,jE[j]] - Jc
+                dN[i,j] = J[i,iN[j]] - Jc
+                dS[i,j] = J[i,iS[j]] - Jc
+                dW[i,j] = J[jW[i],j] - Jc
+                dE[i,j] = J[jE[i],j] - Jc
 
                 G2 = (dN[i,j] * dN[i,j] + dS[i,j] * dS[i,j] + dW[i,j] * dW[i,j] +
                     dE[i,j] * dE[i,j]) /
@@ -121,13 +121,13 @@ function main(args)
             end
         end
 
-        Threads.@threads for i in 1:size(J,1)
-            for j in 1:size(J,2)
+        Threads.@threads for j in 1:size(J,2)
+            for i in 1:size(J,1)
                 # diffusion coefficient
                 cN = c[i,j]
-                cS = c[iS[i],j]
+                cS = c[i,iS[j]]
                 cW = c[i,j]
-                cE = c[i,jE[j]]
+                cE = c[jE[i],j]
 
                 # divergence (equ 58)
                 D = cN * dN[i,j] + cS * dS[i,j] + cW * dW[i,j] + cE * dE[i,j]
@@ -138,8 +138,8 @@ function main(args)
         end
     end
 
-    for i in 1:size(J,1)
-        for j in 1:size(J,2)
+    for j in 1:size(J,2)
+        for i in 1:size(J,1)
             @printf "%.5f " J[i, j]
         end
         println()