From bba157948563213e9dba0c008ac3c12b2b2ef6a9 Mon Sep 17 00:00:00 2001
From: Michael Melesse <micmelesse@gmail.com>
Date: Tue, 1 Nov 2022 17:24:35 +0000
Subject: [PATCH] remove scripts

---
 scripts/amd/backtrace.sh            |  14 ---
 scripts/amd/build.sh                |   7 --
 scripts/amd/cache_print.sh          |  13 ---
 scripts/amd/check_llvm_src.sh       |   3 -
 scripts/amd/clean.sh                |  28 ------
 scripts/amd/collect_rocm_kernels.sh |  11 ---
 scripts/amd/debug.sh                |  15 ---
 scripts/amd/delete_hip_files.sh     |   2 -
 scripts/amd/deps.sh                 |   3 -
 scripts/amd/docker_build.sh         |  16 ---
 scripts/amd/docker_run.sh           |  29 ------
 scripts/amd/find_lib.sh             |   2 -
 scripts/amd/git_config_user.sh      |   7 --
 scripts/amd/git_submodule_add.sh    |   1 -
 scripts/amd/git_submodule_rm.sh     |  10 --
 scripts/amd/git_submodule_update.sh |   6 --
 scripts/amd/grep_for_symbol.sh      |  18 ----
 scripts/amd/hipify.sh               |   1 -
 scripts/amd/lld.sh                  |   1 -
 scripts/amd/post.sh                 |   2 -
 scripts/amd/run.sh                  |  18 ----
 scripts/amd/test.sh                 | 147 ----------------------------
 scripts/amd/test_fptrunc.py         |  62 ------------
 23 files changed, 416 deletions(-)
 delete mode 100644 scripts/amd/backtrace.sh
 delete mode 100755 scripts/amd/build.sh
 delete mode 100755 scripts/amd/cache_print.sh
 delete mode 100644 scripts/amd/check_llvm_src.sh
 delete mode 100644 scripts/amd/clean.sh
 delete mode 100644 scripts/amd/collect_rocm_kernels.sh
 delete mode 100644 scripts/amd/debug.sh
 delete mode 100644 scripts/amd/delete_hip_files.sh
 delete mode 100644 scripts/amd/deps.sh
 delete mode 100644 scripts/amd/docker_build.sh
 delete mode 100755 scripts/amd/docker_run.sh
 delete mode 100644 scripts/amd/find_lib.sh
 delete mode 100644 scripts/amd/git_config_user.sh
 delete mode 100644 scripts/amd/git_submodule_add.sh
 delete mode 100644 scripts/amd/git_submodule_rm.sh
 delete mode 100644 scripts/amd/git_submodule_update.sh
 delete mode 100644 scripts/amd/grep_for_symbol.sh
 delete mode 100644 scripts/amd/hipify.sh
 delete mode 100644 scripts/amd/lld.sh
 delete mode 100644 scripts/amd/post.sh
 delete mode 100644 scripts/amd/run.sh
 delete mode 100755 scripts/amd/test.sh
 delete mode 100644 scripts/amd/test_fptrunc.py

diff --git a/scripts/amd/backtrace.sh b/scripts/amd/backtrace.sh
deleted file mode 100644
index df8595daf..000000000
--- a/scripts/amd/backtrace.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-sudo apt install gdb -y
-
-# export AMD_OCL_WAIT_COMMAND=1
-# export AMD_LOG_LEVEL=3
-# export HIP_LAUNCH_BLOCKING=1
-
-
-gdb -ex "set pagination off" \
-    -ex "file python" \
-    -ex 'run -m pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_bin_op[int32-uint32-+]"' \
-    -ex "backtrace" \
-    -ex "set confirm off" \
-    -ex "q" \
-    2>&1 | tee /dockerx/pytorch/test_core_gdb.log
diff --git a/scripts/amd/build.sh b/scripts/amd/build.sh
deleted file mode 100755
index 962e46775..000000000
--- a/scripts/amd/build.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-set -e
-cd python
-pip uninstall -y triton
-# export TRITON_USE_ROCM=ON
-
-export TRITON_ROCM_DEBUG=ON
-pip install --verbose -e .
\ No newline at end of file
diff --git a/scripts/amd/cache_print.sh b/scripts/amd/cache_print.sh
deleted file mode 100755
index d70d5e120..000000000
--- a/scripts/amd/cache_print.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-CACHED_FILES=$(find /root/.triton/cache/ -type f -name "*.*")
-
-for file in ${CACHED_FILES[@]}; do
-	echo "$file"
-	if [[ $file == *.so ]]; then
-		echo "Skipping printing .so file"
-	else
-		sed -i -e '$a\' $file
-		cat $file
-	fi
-done
diff --git a/scripts/amd/check_llvm_src.sh b/scripts/amd/check_llvm_src.sh
deleted file mode 100644
index a54e58f33..000000000
--- a/scripts/amd/check_llvm_src.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-shopt -s extglob
-/opt/rocm/llvm/bin/llc -mcpu=gfx908 triton_rocm_kernels/*+([0-9]).ll
-# /opt/rocm/llvm/bin/llc -mcpu=gfx908 triton_rocm_kernels/*_before_verify.ll
\ No newline at end of file
diff --git a/scripts/amd/clean.sh b/scripts/amd/clean.sh
deleted file mode 100644
index f4c341ae6..000000000
--- a/scripts/amd/clean.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-set -x
-rm -rf core
-rm -rf ptx.hip
-rm -rf python/build/
-rm -rf python/test/__pycache__/
-rm -rf python/triton.egg-info/
-rm -rf python/triton/_C/libtriton.so
-rm -rf python/triton/__pycache__/
-rm -rf python/triton/ops/__pycache__/
-rm -rf python/triton/ops/blocksparse/__pycache__/
-rm -rf *.isa
-rm -rf *.gcn
-rm -rf *.ptx
-rm -rf *.ll
-rm -rf *.s
-rm -rf *.o
-rm -rf *.hsaco
-rm -rf *.ttir
-sh scripts/amd/delete_hip_files.sh
-rm -rf triton_rocm_kernels
-rm -rf /tmp/*.ll
-rm -rf /tmp/*.gcn
-rm -rf /tmp/*.hsaco
-rm -rf /tmp/*.o
-rm -rf /tmp/*.ttir
-rm -rf /tmp/*.s
-rm -rf build
-rm -rf /root/.triton/cache
\ No newline at end of file
diff --git a/scripts/amd/collect_rocm_kernels.sh b/scripts/amd/collect_rocm_kernels.sh
deleted file mode 100644
index 331558155..000000000
--- a/scripts/amd/collect_rocm_kernels.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-# COPY kernels
-DIRNAME=triton_rocm_kernels
-rm -rf $DIRNAME
-mkdir $DIRNAME
-mv /tmp/*.ttir $DIRNAME
-mv /tmp/*.ll $DIRNAME
-mv /tmp/*.gcn $DIRNAME
-mv /tmp/*.o $DIRNAME
-mv /tmp/*.hsaco $DIRNAME
-mv /tmp/*.s $DIRNAME
-chmod -R 777 $DIRNAME
diff --git a/scripts/amd/debug.sh b/scripts/amd/debug.sh
deleted file mode 100644
index aa6b2a141..000000000
--- a/scripts/amd/debug.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-sudo apt install gdb -y
-
-# export AMD_OCL_WAIT_COMMAND=1
-# export AMD_LOG_LEVEL=3
-# export HIP_LAUNCH_BLOCKING=1
-
-gdb -ex "file python" \
-    -ex 'run -m pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_empty_kernel[float32]"' \
-    -ex "set pagination off" \
-    -ex "set confirm off" \
-    -ex "break _exit" \
-    -ex "commands"
-    -ex "run"
-    -ex 'end' \
-    2>&1 | tee /dockerx/pytorch/test_core_gdb.log
diff --git a/scripts/amd/delete_hip_files.sh b/scripts/amd/delete_hip_files.sh
deleted file mode 100644
index b6b5abe5c..000000000
--- a/scripts/amd/delete_hip_files.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-# find . -name '*hip.h' -delete
-find . -name '*_hip.*' -delete
\ No newline at end of file
diff --git a/scripts/amd/deps.sh b/scripts/amd/deps.sh
deleted file mode 100644
index d7f2970cc..000000000
--- a/scripts/amd/deps.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-sudo apt update
-sudo apt install libtinfo-dev gdb 
-# sudo apt install llvm-11 # install on cuda
\ No newline at end of file
diff --git a/scripts/amd/docker_build.sh b/scripts/amd/docker_build.sh
deleted file mode 100644
index 0395ac0e2..000000000
--- a/scripts/amd/docker_build.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-# print every command
-set -o xtrace
-
-# set path
-# DOCKERFILE_PATH=scripts/docker/Dockerfile.triton_rocm
-# DOCKERFILE_PATH=scripts/docker/Dockerfile.triton_cuda
-# DOCKERFILE_PATH=triton_rocm_all_archs.Dockerfile
-DOCKERFILE_PATH=triton_rocm_20-52.Dockerfile
-
-# get tag
-DOCKERFILE_NAME=$(basename $DOCKERFILE_PATH)
-DOCKERIMAGE_NAME=$(echo "$DOCKERFILE_NAME" | cut -f -1 -d '.')
-echo $DOCKERIMAGE_NAME
-
-# build docker
-docker build --build-arg CACHEBUST=$(date +%s) -f $DOCKERFILE_PATH -t $DOCKERIMAGE_NAME .
diff --git a/scripts/amd/docker_run.sh b/scripts/amd/docker_run.sh
deleted file mode 100755
index 63a34fa9c..000000000
--- a/scripts/amd/docker_run.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-set -o xtrace
-
-alias drun='sudo docker run -it --rm --network=host --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined'
-
-# DEVICES="--gpus all"
-DEVICES="--device=/dev/kfd --device=/dev/dri"
-
-MEMORY="--ipc=host --shm-size 16G"
-
-VOLUMES="-v $HOME/dockerx:/dockerx -v /data:/data"
-
-# WORK_DIR='/root/$(basename $(pwd))'
-WORK_DIR="/dockerx/$(basename $(pwd))"
-
-# IMAGE_NAME=nvcr.io/nvidia/pytorch:21.08-py3
-# IMAGE_NAME=rocm/pytorch # latest doesnot work
-# IMAGE_NAME=rocm/pytorch:rocm4.3.1_ubuntu18.04_py3.6_pytorch_1.10.0
-IMAGE_NAME=triton_rocm_20-52 # build this docker before running
-
-CONTAINER_NAME=triton
-
-# start new container
-CONTAINER_ID=$(drun -d -w $WORK_DIR --name $CONTAINER_NAME $MEMORY $VOLUMES $DEVICES $IMAGE_NAME)
-echo "CONTAINER_ID: $CONTAINER_ID"
-# docker cp . $CONTAINER_ID:$WORK_DIR
-# docker exec $CONTAINER_ID bash -c "bash scripts/amd/run.sh"
-docker attach $CONTAINER_ID
-docker stop $CONTAINER_ID
-docker rm $CONTAINER_ID
diff --git a/scripts/amd/find_lib.sh b/scripts/amd/find_lib.sh
deleted file mode 100644
index 5c1c3240e..000000000
--- a/scripts/amd/find_lib.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-LIB_NAME=libtinfow
-ldconfig -p | grep $LIB_NAME
\ No newline at end of file
diff --git a/scripts/amd/git_config_user.sh b/scripts/amd/git_config_user.sh
deleted file mode 100644
index d1063202e..000000000
--- a/scripts/amd/git_config_user.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-# use --global flag if you want to set it for whole machine
-git config user.name "Michael Melesse"
-git config user.email "micmelesse@gmail.com"
-
-# unset with
-# git config --global --unset-all user.name
-# git config --global --unset-all user.email
\ No newline at end of file
diff --git a/scripts/amd/git_submodule_add.sh b/scripts/amd/git_submodule_add.sh
deleted file mode 100644
index 07c971c47..000000000
--- a/scripts/amd/git_submodule_add.sh
+++ /dev/null
@@ -1 +0,0 @@
-git submodule add https://github.com/ROCmSoftwarePlatform/hipify-torch third_party/hipify-torch
\ No newline at end of file
diff --git a/scripts/amd/git_submodule_rm.sh b/scripts/amd/git_submodule_rm.sh
deleted file mode 100644
index e42d6bd4a..000000000
--- a/scripts/amd/git_submodule_rm.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-SUBMODULE=third_party/hipify-torch
-
-# Remove the submodule entry from .git/config
-git submodule deinit -f $SUBMODULE
-
-# Remove the submodule directory from the superproject's .git/modules directory
-rm -rf .git/modules/$SUBMODULE
-
-# Remove the entry in .gitmodules and remove the submodule directory located at path/to/submodule
-git rm -f $SUBMODULE
\ No newline at end of file
diff --git a/scripts/amd/git_submodule_update.sh b/scripts/amd/git_submodule_update.sh
deleted file mode 100644
index 431c18606..000000000
--- a/scripts/amd/git_submodule_update.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-# if you are updating an existing checkout
-git submodule sync
-git submodule update --init --recursive
-
-# if you want to push every to tip
-# git submodule update --init --recursive --remote
diff --git a/scripts/amd/grep_for_symbol.sh b/scripts/amd/grep_for_symbol.sh
deleted file mode 100644
index aae3d88b0..000000000
--- a/scripts/amd/grep_for_symbol.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-# SYMBOL=_ZN4llvm11PassBuilder17OptimizationLevel2O0E
-# SYMBOL=_ZN4llvm11DDGAnalysis3KeyE
-# SYMBOL=_ZN4llvm26UnifyFunctionExitNodesPass3runERNS_8FunctionERNS_15AnalysisManagerIS1_JEEE
-# SYMBOL=_ZN4llvm12LoopFusePass3runERNS_8FunctionERNS_15AnalysisManagerIS1_JEEE
-# SYMBOL=_ZN4llvm30moveInstructionsToTheBeginningERNS_10BasicBlockES1_RNS_13DominatorTreeERKNS_17PostDominatorTreeERNS_14DependenceInfoE
-# SYMBOL=_ZN4llvm17LoopExtractorPass3runERNS_6ModuleERNS_15AnalysisManagerIS1_JEEE
-# SYMBOL=_ZN4llvm17ObjCARCExpandPass3runERNS_8FunctionERNS_15AnalysisManagerIS1_JEEE
-# SYMBOL=_ZN4llvm13CoroSplitPass3runERNS_13LazyCallGraph3SCCERNS_15AnalysisManagerIS2_JRS1_EEES5_RNS_17CGSCCUpdateResultE
-SYMBOL=_ZN4llvm20SyntheticCountsUtilsIPKNS_9CallGraphEE9propagateERKS3_NS_12function_refIFNS_8OptionalINS_12ScaledNumberImEEEEPKNS_13CallGraphNodeERKSt4pairINS8_INS_14WeakTrackingVHEEEPSC_EEEENS7_IFvSE_SA_EEE
-for lib in $(find /tmp/clang+llvm-13.0.0-x86_64-linux-gnu-ubuntu-16.04/ -name \*.a); do
-    symbols=$(nm $lib | grep $SYMBOL | grep -v " U ")
-    
-    if [ "${#symbols}" -gt "0" ]; then
-        echo $lib
-        echo $symbols
-    fi
-
-done
diff --git a/scripts/amd/hipify.sh b/scripts/amd/hipify.sh
deleted file mode 100644
index 474659456..000000000
--- a/scripts/amd/hipify.sh
+++ /dev/null
@@ -1 +0,0 @@
-PYTHONDONTWRITEBYTECODE=1 python3 third_party/hipify-torch/hipify_cli.py --project-directory .
diff --git a/scripts/amd/lld.sh b/scripts/amd/lld.sh
deleted file mode 100644
index f3542efa8..000000000
--- a/scripts/amd/lld.sh
+++ /dev/null
@@ -1 +0,0 @@
-/opt/rocm/llvm/bin/ld.lld -flavor gnu -shared _empty.o -o _empty.hsaco
diff --git a/scripts/amd/post.sh b/scripts/amd/post.sh
deleted file mode 100644
index 8c40f18d0..000000000
--- a/scripts/amd/post.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-bash scripts/amd/collect_rocm_kernels.sh
-bash scripts/amd/check_llvm_src.sh
\ No newline at end of file
diff --git a/scripts/amd/run.sh b/scripts/amd/run.sh
deleted file mode 100644
index 015749c54..000000000
--- a/scripts/amd/run.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-clear
-
-ROOT_DIR=$(pwd)
-LOG_DIR=$ROOT_DIR/log_$(git rev-parse --symbolic-full-name --abbrev-ref HEAD)
-rm -rf $LOG_DIR
-mkdir -p $LOG_DIR
-chmod -R 777 $LOG_DIR
-
-
-bash scripts/amd/clean.sh
-# bash scripts/amd/deps.sh
-bash scripts/amd/build.sh
-bash scripts/amd/test.sh  2>&1 |tee $LOG_DIR/test.log
-# bash scripts/amd/debug.sh
-# bash scripts/amd/backtrace.sh 2>&1 |tee $LOG_DIR/backtrace.log
-
-# bash scripts/amd/cache_print.sh  2>&1 |tee $LOG_DIR/cache.log
-# bash scripts/amd/post.sh # dont double call
\ No newline at end of file
diff --git a/scripts/amd/test.sh b/scripts/amd/test.sh
deleted file mode 100755
index 0d0d660ee..000000000
--- a/scripts/amd/test.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-# clear
-rm -rf triton_rocm_kernels
-
-# export TRITON_LIBHIP=/opt/rocm/lib/libamdhip64.so
-
-# export AMD_OCL_WAIT_COMMAND=1
-# export AMD_LOG_LEVEL=3
-# export HIP_LAUNCH_BLOCKING=1
-
-# remove cache to avoid segfaults
-# TODO: inform triton dev the cache cause segfault
-rm -rf /tmp/triton
-
-# pytest python/test
-# pytest python/test/test_blocksparse.py
-
-# pytest --verbose python/test/test_conv.py
-# pytest --verbose python/test/test_blocksparse.py::test_matmul[sdd-False-False-16-float16]
-# pytest --verbose python/test/test_blocksparse.py::test_attention_fwd_bwd
-# python python/test/test_conv.py
-
-# gdb -ex "set breakpoint pending on" \
-#     -ex 'break add_passes_to_emit_bin' \
-#     --args python python/test/test_add.py
-
-# python python/test/test_empty.py
-# -ex 'ignore 1 472' \
-
-pytest -rfs --verbose python/test/unit/language/test_core.py 2>&1 | tee /dockerx/triton/test_core.log
-# pytest --verbose python/test/unit/language/test_core.py::test_empty_kernel[float32] 2>&1 | tee /dockerx/triton/test_empty_kernel.log
-# pytest --verbose python/test/unit/language/test_core.py::test_bin_op[int32-uint32-+] 2>&1 | tee /dockerx/triton/test_bin_op.log
-# pytest --verbose python/test/unit/language/test_core.py::test_atomic_rmw 2>&1 | tee /dockerx/triton/test_atomic_rmw.log
-# pytest --verbose python/test/unit/language/test_core.py::test_atomic_rmw[add-float16-all_neg] 2>&1 | tee /dockerx/triton/test_atomic_rmw.log
-#  pytest --verbose "python/test/unit/language/test_core.py::test_reduce1d" 2>&1 | tee /dockerx/triton/test_reduce1d.log
-# pytest --verbose "python/test/unit/language/test_core.py::test_cast[float32-float32-False]" 2>&1 | tee /dockerx/triton/test_cast.log
-# pytest --verbose "python/test/unit/language/test_core.py::test_load_cache_modifier" 2>&1 | tee /dockerx/triton/test_vectorization.log
-
-# mismatch
-# pytest --verbose "python/test/unit/language/test_core.py::test_bin_op[int8-float16-%]"" 2>&1 | tee /dockerx/triton/test_bin_op.log
-# pytest --verbose "python/test/unit/language/test_core.py::test_bitwise_op[int32-int32-&1]" 2>&1 | tee /dockerx/triton/test_bitwise_op.log
-# pytest --verbose "python/test/unit/language/test_core.py::test_shift_op[int32-int32->>]" 2>&1 | tee /dockerx/triton/test_shift_op.log
-# pytest --verbose "python/test/unit/language/test_core.py::test_compare_op[float32-float32->-nan-real]" 2>&1 | tee /dockerx/triton/test_compare_op.log
-# pytest --verbose "python/test/unit/language/test_core.py::test_cast[float32-float32-False]" 2>&1 | tee /dockerx/triton/test_compare_op.log
-
-# pytest --capture=tee-sys --verbose  python/test/regression/test_performance.py | tee /dockerx/triton/test_performance.log
-# pytest --capture=tee-sys --verbose  python/test/regression/test_performance.py::test_matmul | tee /dockerx/triton/test_performance_matmul.log
-# pytest --capture=tee-sys --verbose  python/test/regression/test_performance.py::test_elementwise | tee /dockerx/triton/test_performance_elementwise.log
-
-# pytest --capture=tee-sys --verbose  python/test/regression/test_performance.py::test_matmul[256-256-256]
-
-# pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_empty_kernel[float32]
-# pytest --verbose python/test/unit/language/test_core.py::test_load_and_store_op[float32-2]
-# pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_load_and_store_op_with_mask
-# pytest --verbose python/test/unit/language/test_core.py::test_program_id[float32]
-# pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_num_programs[float32]
-# pytest --verbose python/test/unit/language/test_core.py::test_unary_op
-# pytest --verbose python/test/unit/language/test_core.py::test_bin_op
-# pytest --verbose "python/test/unit/language/test_core.py::test_dot"
-# pytest --verbose python/test/unit/language/test_core.py::test_cast
-# pytest --verbose python/test/unit/language/test_core.py::test_reduce1d
-# pytest --verbose python/test/unit/language/test_core.py::test_reduce2d
-# pytest --verbose python/test/unit/language/test_core.py::test_math_op
-# pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_atomic_rmw
-# pytest --verbose python/test/unit/operators/test_blocksparse.py::test_matmul
-# pytest --verbose python/test/unit/operators/test_blocksparse.py::test_matmul[DTYPE0-16-False-False-dds]
-# pytest --verbose python/test/unit/operators/test_blocksparse.py::test_matmul[DTYPE0-64-False-False-dds]
-# pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_matmul
-
-# pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_load_and_store_op_with_mask
-# pytest  --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_masked_load_shared_memory"
-# pytest --verbose "python/test/unit/operators/test_blocksparse.py::test_softmax[DTYPE0-256-16]"
-# pytest --verbose "python/test/unit/operators/test_blocksparse.py::test_softmax" #|& tee /dockerx/triton/test_softmax.log
-
-
-# pytest --verbose "python/test/unit/operators/test_blocksparse.py::test_softmax[DTYPE0-1024-16]" # PASSED                                                                                [ 29%]
-# pytest --verbose "python/test/unit/operators/test_blocksparse.py::test_softmax[DTYPE0-1024-32]" # FAILED
-# pytest --verbose python/test/unit/language/test_core.py::test_permute
-# pytest --verbose python/test/unit/language/test_core.py::test_load_cache_modifier
-
-# pytest --verbose python/test/unit/language/test_core.py::test_math_op[log]
-# pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_load_and_store_op[float64]
-# pytest --verbose "python/test/unit/language/test_core.py::test_bin_op[int8-int64- x % y]"
-# pytest --verbose "python/test/unit/language/test_core.py::test_dot[none]" |& tee /dockerx/triton/test_dot_none.log
-# pytest --verbose "python/test/unit/language/test_core.py::test_dot[add-rows]"
-# pytest --verbose "python/test/unit/language/test_core.py::test_dot[add-cols]"
-# pytest --verbose "python/test/unit/language/test_core.py::test_cast[float32-float16-False]"
-
-# pytest --verbose python/test/unit/operators/test_blocksparse.py::test_matmul[DTYPE0-32-False-False-sdd]
-# pytest --capture=tee-sys --verbose python/test/unit/operators/test_blocksparse.py::test_softmax[DTYPE0-256-32]
-
-# pytest --verbose python/test/unit/operators/test_blocksparse.py
-# pytest --verbose python/test/unit/operators/test_blocksparse.py::test_matmul[DTYPE0-32-False-False-sdd]
-# pytest --verbose scripts/amd/test_fptrunc.py
-# pytest --verbose scripts/amd/test_fptrunc.py::test_fptrunc[float32-float32-False]
-# pytest --verbose "python/test/unit/language/test_core.py::test_cast"
-# pytest --verbose "python/test/unit/language/test_core.py::test_cast[float32-float16-False]"
-# pytest --verbose "python/test/unit/language/test_core.py::test_cast[float32-bfloat16-False]"
-# python python/test/unit/language/test_core.py
-
-# pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_empty_kernel
-
-# pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_bin_op[int8-int64- x % y]"
-# pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_bin_op[int8-float32- x % y]"
-# pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_bin_op[int8-float16- x % y]"
-# pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_bin_op[float32-float64- x % y]"
-# pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_math_op[exp]"
-# pytest --verbose "python/test/unit/operators/test_blocksparse.py"
-# pytest --capture=tee-sys --verbose "python/test/unit/operators/test_blocksparse.py::test_matmul[sdd-False-False-16-float16]"
-
-# pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_arange"
-
-# pytest --verbose "python/test/unit/language/test_core.py::test_masked_load_shared_memory"
-# pytest --verbose "python/test/unit/language/test_core.py::test_dot_without_load"
-# pytest --verbose "python/test/unit/language/test_core.py::test_fmadot"
-
-# FAILING TESTS
-# pytest --verbose "python/test/unit/language/test_core.py::test_bin_op[int8-float16- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int8-float32- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int8-float64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int16-float16- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int16-float32- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int16-float64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int32-float16- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int32-float32- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int32-float64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int64-float16- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int64-float32- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[int64-float64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float16-int8- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float16-int16- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float16-int32- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float16-int64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float16-float64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float32-int8- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float32-int16- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float32-int32- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float32-int64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float32-float64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float64-int8- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float64-int16- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float64-int32- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float64-int64- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float64-float16- x % y]" \
-#     "python/test/unit/language/test_core.py::test_bin_op[float64-float32- x % y]"
-
-# do post test steps
-# bash scripts/amd/post.sh # it should be run in the run script
diff --git a/scripts/amd/test_fptrunc.py b/scripts/amd/test_fptrunc.py
deleted file mode 100644
index 932ab2942..000000000
--- a/scripts/amd/test_fptrunc.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import torch
-import triton
-import triton.language as tl
-import pytest
-
-cvt = {
-    'bool': torch.bool,
-    'int8': torch.int8,
-    'int16': torch.int16,
-    'int32': torch.int32,
-    'int64': torch.int64,
-    'bfloat16': torch.bfloat16,
-    'float16': torch.float16,
-    'float32': torch.float32,
-    'float64': torch.float64,
-}
-
-int_dtypes = ['int8', 'int16', 'int32', 'int64']
-float_dtypes = ['float16', 'float32', 'float64']
-dtypes = int_dtypes + float_dtypes
-
-
-@pytest.mark.parametrize("dtype_x, dtype_z, bitcast", [
-    (dtype_x, dtype_z, False)
-    for dtype_x in dtypes
-    for dtype_z in dtypes
-])
-def test_fptrunc(dtype_x, dtype_z, bitcast, device='cuda'):
-    SIZE = 256
-    # define the kernel / launch-grid
-    @triton.jit
-    def kernel(Z, X, **meta):
-        off = tl.arange(0, meta['SIZE'])
-        x = tl.load(X + off)
-        tl.store(Z + off, x)
-    # inputs
-    x = triton.testing.random(SIZE, dtype=cvt[dtype_x], device=device)
-
-    # reference result
-    z_ref = x.type(dtype=cvt[dtype_z])
-
-    # triton result
-    z_tri = torch.zeros_like(x, dtype=cvt[dtype_z])
-
-    # triton.testing.assert_almost_equal(z_ref, z_tri)
-
-    print("before kernel")
-    # run load and store kernel
-    kernel[(1, )](z_tri, x, SIZE=SIZE, num_warps=1)
-    print("after kernel")
-
-    # print("x:", x)
-    # print("z_ref:", z_ref)
-    # print("z_tri:", z_tri)
-    # compare
-    print("before compare")
-    triton.testing.assert_almost_equal(z_ref, z_tri)
-    print("after compare")
-
-
-if __name__ == '__main__':
-    test_fptrunc()