from sklearn import tree
from sklearn import ensemble
import numpy as np

def gmean(a, axis=0, dtype=None):
    if not isinstance(a, np.ndarray):  # if not an ndarray object attempt to convert it
        log_a = np.log(np.array(a, dtype=dtype))
    elif dtype:  # Must change the default dtype allowing array type
        if isinstance(a,np.ma.MaskedArray):
            log_a = np.log(np.ma.asarray(a, dtype=dtype))
        else:
            log_a = np.log(np.asarray(a, dtype=dtype))
    else:
        log_a = np.log(a)
    return np.exp(log_a.mean(axis=axis))

def nrmse(y_ground, y):
    N = y.size
    rmsd = np.sqrt(np.sum((y_ground - y)**2)/N)
    return rmsd/(np.max(y_ground) - np.min(y_ground))

def train_model(X, Y, profiles, metric):
    #Shuffle
    p = np.random.permutation(X.shape[0])
    X = X[p,:]
    Y = Y[p,:]
    #Normalize
    Ymax = np.max(Y)
    Y = Y/Ymax
    #Train the model
    cut = int(0.9*X.shape[0])
    nrmses = {}
    for depth in range(1,10):
        clf = ensemble.RandomForestRegressor(5, max_depth=4).fit(X[:cut,:], Y[:cut,:])
        t = np.argmin(clf.predict(X[cut:,:]), axis = 1)
        y = np.array([Y[cut+i,t[i]] for i in range(t.size)])
        y_ground = np.min(Y[cut:,:], axis=1)
        # for i in range(t.size):
        #     print X[cut+i,:], y[i], y_ground[i]
        nrmses[clf] = nrmse(y_ground, y)
        print depth, nrmses[clf]

    clf = min(nrmses, key=nrmses.get)

    return clf