tsne降维可视化

摘要:
=“float64”:print“错误:arrayXshould have typefloat64。”;返回-1;#如果没有调光__类__!=“”:#还没有!

tsne降维可视化第1张

Python代码:准备训练样本的数据和标签:train_X4000.txt、train_y4000.txt 放于tsne.py当前目录.(具体t-SNE – Laurens van der Maaten http://lvdmaaten.github.io/tsne/Python implementation),

tsne.py代码:(为了使得figure显示数据的标签,代码做了简单修改)

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  tsne.py
#  
# Implementation of t-SNE in Python. The implementation was tested on Python 2.5.1, and it requires a working
# installation of NumPy. The implementation comes with an example on the MNIST dataset. In order to plot the
# results of this example, a working installation of matplotlib is required.
# The example can be run by executing: ipython tsne.py -pylab
#
#
#  Created by Laurens van der Maaten on 20-12-08.
#  Copyright (c) 2008 Tilburg University. All rights reserved.

import numpy as Math
import pylab as Plot
    
def Hbeta(D = Math.array([]), beta = 1.0):
    """Compute the perplexity and the P-row for a specific value of the precision of a Gaussian distribution."""
    
    # Compute P-row and corresponding perplexity
    P = Math.exp(-D.copy() * beta);
    sumP = sum(P)+1e-6;
    H = Math.log(sumP) + beta * Math.sum(D * P) / sumP;
    P = P / sumP;
    return H, P;
    
    
def x2p(X = Math.array([]), tol = 1e-5, perplexity = 30.0):
    """Performs a binary search to get P-values in such a way that each conditional Gaussian has the same perplexity."""

    # Initialize some variables
    print "Computing pairwise distances..."
    (n, d) = X.shape;
    sum_X = Math.sum(Math.square(X), 1);
    D = Math.add(Math.add(-2 * Math.dot(X, X.T), sum_X).T, sum_X);
    P = Math.zeros((n, n));
    beta = Math.ones((n, 1));
    logU = Math.log(perplexity);
    
    # Loop over all datapoints
    for i in range(n):
    
        # Print progress
        if i % 500 == 0:
            print "Computing P-values for point ", i, " of ", n, "..."
    
        # Compute the Gaussian kernel and entropy for the current precision
        betamin = -Math.inf;
        betamax =  Math.inf;
        Di = D[i, Math.concatenate((Math.r_[0:i], Math.r_[i+1:n]))];
        (H, thisP) = Hbeta(Di, beta[i]);
            
        # Evaluate whether the perplexity is within tolerance
        Hdiff = H - logU;
        tries = 0;
        while Math.abs(Hdiff) > tol and tries < 50:
                
            # If not, increase or decrease precision
            if Hdiff > 0:
                betamin = beta[i].copy();
                if betamax == Math.inf or betamax == -Math.inf:
                    beta[i] = beta[i] * 2;
                else:
                    beta[i] = (beta[i] + betamax) / 2;
            else:
                betamax = beta[i].copy();
                if betamin == Math.inf or betamin == -Math.inf:
                    beta[i] = beta[i] / 2;
                else:
                    beta[i] = (beta[i] + betamin) / 2;
            
            # Recompute the values
            (H, thisP) = Hbeta(Di, beta[i]);
            Hdiff = H - logU;
            tries = tries + 1;
            
        # Set the final row of P
        P[i, Math.concatenate((Math.r_[0:i], Math.r_[i+1:n]))] = thisP;
    
    # Return final P-matrix
        print "Mean value of sigma: ", Math.mean(Math.sqrt(1 / beta))
    return P;
    
    
def pca(X = Math.array([]), no_dims = 50):
    """Runs PCA on the NxD array X in order to reduce its dimensionality to no_dims dimensions."""

    print "Preprocessing the data using PCA..."
    (n, d) = X.shape;
    X = X - Math.tile(Math.mean(X, 0), (n, 1));
    (l, M) = Math.linalg.eig(Math.dot(X.T, X));
    Y = Math.dot(X, M[:,0:no_dims]);
    return Y;


def tsne(X = Math.array([]), no_dims = 2, initial_dims = 50, perplexity = 30.0):
    """Runs t-SNE on the dataset in the NxD array X to reduce its dimensionality to no_dims dimensions.
    The syntaxis of the function is Y = tsne.tsne(X, no_dims, perplexity), where X is an NxD NumPy array."""
    
    # Check inputs
    if X.dtype != "float64":
        print "Error: array X should have type float64.";
        return -1;
    #if no_dims.__class__ != "":            # doesn't work yet!
    #    print "Error: number of dimensions should be an integer.";
    #    return -1;
    
    # Initialize variables
    X = pca(X, initial_dims).real;
    (n, d) = X.shape;
    max_iter = 1000
    initial_momentum = 0.5;
    final_momentum = 0.8;
    eta = 500;
    min_gain = 0.01;
    Y = Math.random.randn(n, no_dims);
    dY = Math.zeros((n, no_dims));
    iY = Math.zeros((n, no_dims));
    gains = Math.ones((n, no_dims));
    
    # Compute P-values
    P = x2p(X, 1e-5, perplexity);
    P = P + Math.transpose(P);
    P = P / (Math.sum(P));
    P = P * 4;                                    # early exaggeration
    P = Math.maximum(P, 1e-12);
    
    # Run iterations
    for iter in range(max_iter):
        
        # Compute pairwise affinities
        sum_Y = Math.sum(Math.square(Y), 1);        
        num = 1 / (1 + Math.add(Math.add(-2 * Math.dot(Y, Y.T), sum_Y).T, sum_Y));
        num[range(n), range(n)] = 0;
        Q = num / Math.sum(num);
        Q = Math.maximum(Q, 1e-12);
        
        # Compute gradient
        PQ = P - Q;
        for i in range(n):
            dY[i,:] = Math.sum(Math.tile(PQ[:,i] * num[:,i], (no_dims, 1)).T * (Y[i,:] - Y), 0);
            
        # Perform the update
        if iter < 20:
            momentum = initial_momentum
        else:
            momentum = final_momentum
        gains = (gains + 0.2) * ((dY > 0) != (iY > 0)) + (gains * 0.8) * ((dY > 0) == (iY > 0));
        gains[gains < min_gain] = min_gain;
        iY = momentum * iY - eta * (gains * dY);
        Y = Y + iY;
        Y = Y - Math.tile(Math.mean(Y, 0), (n, 1));
        
        # Compute current value of cost function
        if (iter + 1) % 10 == 0:
            C = Math.sum(P * Math.log(P / Q));
            print "Iteration ", (iter + 1), ": error is ", C
            
        # Stop lying about P-values
        if iter == 100:
            P = P / 4;
            
    # Return solution
    return Y;
        
    
if __name__ == "__main__":
    print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
    print "Running example on 2,500 MNIST digits..."
    X = Math.loadtxt("train_X4000.txt");
#X = X[:100]
    labels = Math.loadtxt("train_y4000.txt");
#labels = labels[:100]
    Y = tsne(X, 2, 38, 20.0);
    fil = open('Y.txt','w')
    for i in Y:
        fil.write(str(i[0])+' '+str(i[1])+'
')
    fil.close()
    colors=['b', 'c', 'y', 'm', 'r']
    idx_1 = [i1 for i1 in range(len(labels)) if labels[i1]==1]
    flg1=Plot.scatter(Y[idx_1,0], Y[idx_1,1], 20,color=colors[0],label='1');
    idx_2= [i2 for i2 in range(len(labels)) if labels[i2]==2]
    flg2=Plot.scatter(Y[idx_2,0], Y[idx_2,1], 20,color=colors[1], label='2');
    idx_3= [i3 for i3 in range(len(labels)) if labels[i3]==3]
    flg3=Plot.scatter(Y[idx_3,0], Y[idx_3,1], 20, color=colors[2],label='3');
    idx_4= [i4 for i4 in range(len(labels)) if labels[i4]==4]
    flg4=Plot.scatter(Y[idx_4,0], Y[idx_4,1], 20,color=colors[3], label='4');    
    idx_5= [i5 for i5 in range(len(labels)) if labels[i5]==5]
    flg5=Plot.scatter(Y[idx_5,0], Y[idx_5,1], 20, color=colors[4],label='5');
#    flg=Plot.scatter(Y[:,0], Y[:,1], 20,labels);
    Plot.legend()
    Plot.savefig('figure4000.pdf')
    Plot.show()

  

免责声明:文章转载自《tsne降维可视化》仅用于学习参考。如对内容有疑问,请及时联系本站处理。

上篇maven入门基础:仓库概念和仓库分类(一)连接MySQL报 unblock with 'mysqladmin flush-hosts' 问题解决下篇

宿迁高防,2C2G15M,22元/月;香港BGP,2C5G5M,25元/月 雨云优惠码:MjYwNzM=

相关文章

基于WebGL架构的3D可视化平台—实现汽车行走路线演示

小车行走路线演示New VS Old 刚接触ThingJS的时候,写的一个小车开进小区的演示,今天又看了教程中有movePath这个方法就重新写了一遍,其中也遇到了一些问题,尤其突出的问题就是小车过弯的尴尬表现。 先给大家看看Old版本,Old版演示地址。 再看看New版本,AE86过弯不再笨拙顺畅无比,New版演示地址。 第二张效果图上可以看到由点连...

数据可视化之PowerQuery篇(七)Power Query应用技巧:批量更改列名

https://zhuanlan.zhihu.com/p/130460772 ​今天分享一个PowerQuery的小技巧,导入到PowerBI中的数据,如果想要更改数据的列名,可以在PQ编辑器中直接双击列名,如果需要修改的列名比较多,还可以批量修改,利用这个M函数:Table.RenameColumns 其实双击更改列名也是利用这个函数,以下面这个数据为例...

可视化GC日志分析工具

一、GC日志输出参数   前面通过-XX:+PrintGCDetails可以对GC日志进行打印,我们就可以在控制台查看,这样虽然可以查看GC的信息,但是并不直观,可以借助于第三方的GC日志分析工具进行查看。   在日志打印输出涉及到的参数如下:     ‐XX:+PrintGC 输出GC日志     ‐XX:+PrintGCDetails 输出G...

Git配置可视化的diff 和merge工具

Windows下使用Git,msysgit是首选,但是msysgit的shell实在不给力,大小不能更改,字体难看。所以,在Windows下,在Cygwin下使用Git是个很不错的选择。 我们在提交代码前,或是合并代码,查看代码修改时,经常要diff一下看看都有哪些修改内容,diff的输出,晦涩难懂,修改多了的时候,简直像天书一样。Git 1.7以后,有了...

使用tensorboard可视化模型

  Tensorboard是TF自带的可视化工具。它可以让我们从各个角度观察与修改模型,比如观察模型在训练时的loss动态变化曲线而无需在迭代完毕后再画图、绘制神经网络的结构图、调节超参数等。下面以最简单的形式展示tensorboard的常用功能。 开启tensorboard   打开命令行输入 tensorboard --logdir logs   然后...

178.vim 可视化模式(visual模式)

为了便于选取文本,VIM 引入了可视(Visual)模式。 可以使用以下三种模式: 用 v 命令进入的字符可视化模式(Characterwise visual mode)。文本选择是以字符为单位的。 用 V 命令进入的行可视化模式(Linewise visual mode)。文本选择是以行为单位的。 用 ctrl-V 进入的块可视化模式(Blockwise...