本文整理汇总了Python中tsne.bh_sne函数的典型用法代码示例。如果您正苦于以下问题:Python bh_sne函数的具体用法?Python bh_sne怎么用?Python bh_sne使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了bh_sne函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: extract_tsne_gather_feat
def extract_tsne_gather_feat(stage):
"""
Extract tsne gather features.
Note: python2 only.
Better than func:extract_tsne_feat in cv, but worst in submission.
"""
df_w2vlem_join = pd.read_csv('tmp2/df_w2vlem_join.csv', index_col=0)
if stage <= 1:
df_feat = pd.DataFrame(index=df_w2vlem_join.index.values)
tfidf = TfidfVectorizer(ngram_range=(2,4), stop_words='english', min_df=2)
df_w2vlem_join['t_w2v'].to_csv('tmp2/t_w2v', index=False)
df_w2vlem_join['q_w2v'].to_csv('tmp2/q_w2v', index=False)
df_w2vlem_join['d_w2v'].to_csv('tmp2/d_w2v', index=False)
tfidf.set_params(input='filename')
tfidf.fit(['tmp2/t_w2v','tmp2/q_w2v','tmp2/d_w2v'])
tfidf.set_params(input='content')
cPickle.dump(tfidf, open('tmp2/tfidf_obj','wb'))
tfidf = cPickle.load(open('tmp2/tfidf_obj','rb'))
X_t = tfidf.transform(df_w2vlem_join['t_w2v'].tolist())
if stage <= 2:
svd = TruncatedSVD(n_components=100, random_state=2016)
X_svd = svd.fit_transform(X_t)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_t_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_t_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_feat.to_csv('tmp2/tsne_t', index=False)
df_feat = pd.read_csv('tmp2/tsne_t')
if stage <= 3:
print(df_feat)
X_q = tfidf.transform(df_w2vlem_join['q_w2v'].tolist())
X_tq = sp.hstack([X_t, X_q]).tocsr()
svd = TruncatedSVD(n_components=50, random_state=2016)
X_svd = svd.fit_transform(X_tq)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_qt_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_qt_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_feat.to_csv('tmp2/tsne_qt', index=False)
df_feat = pd.read_csv('tmp2/tsne_qt')
if stage <= 4:
print(df_feat)
X_d = tfidf.transform(df_w2vlem_join['d_w2v'].tolist())
svd = TruncatedSVD(n_components=100, random_state=2016)
X_svd = svd.fit_transform(X_d)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_desc_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_desc_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_tsne_feats = df_feat
df_tsne_feats.to_csv('tmp2/df_tsne_gather_feats.csv')
开发者ID:amsqr,项目名称:hd,代码行数:59,代码来源:python2_tsne.py
示例2: test_seed
def test_seed():
from tsne import bh_sne
from sklearn.datasets import load_iris
import numpy as np
iris = load_iris()
X = iris.data
y = iris.target
t1 = bh_sne(X, random_state=np.random.RandomState(0), copy_data=True)
t2 = bh_sne(X, random_state=np.random.RandomState(0), copy_data=True)
assert np.all(t1 == t2)
开发者ID:10XDev,项目名称:tsne,代码行数:13,代码来源:test_seed.py
示例3: fit_transform
def fit_transform(self, X):
"""Perform both a fit and a transform on the input data
Fit the data to the reduction algorithm, and transform the data to
the reduced space.
Parameters
----------
X : pandas.DataFrame
A (n_samples, n_features) dataframe to both fit and transform
Returns
-------
self : DataFrameReducerBase
A fit and transformed instance of the object
Raises
------
ValueError
If the input is not a pandas DataFrame, will not perform the fit
and transform
"""
from tsne import bh_sne
self._check_dataframe(X)
return pd.DataFrame(bh_sne(X), index=X.index)
开发者ID:bobbybabra,项目名称:flotilla,代码行数:27,代码来源:decomposition.py
示例4: t_sne
def t_sne(obj):
p = parser()
data_categories = {}
label_categories = {}
for d in obj:
for c in p.categories_item(d):
if c not in data_categories:
data_categories[c] = []
label_categories[c] = []
data_categories[c].append(d[1:])
label_categories[c].append('g' if d[0] == 1 else 'r')
print len(data_categories)
for c in data_categories:
print '------------------------'
print '%s (%d)' % (c, len(data_categories[c]))
print '------------------------'
if len(data_categories[c]) > 100:
t_sne(data_categories[c], label_categories[c])
else:
print 'small dimensionality'
arr = np.array(data_categories, dtype=np.float64)
x2 = bh_sne(arr)
plt.scatter(x2[:, 0], x2[:, 1], c=label_categories)
plt.show()
开发者ID:jordansilva,项目名称:lorien,代码行数:29,代码来源:processor.py
示例5: getTsne
def getTsne(modelFile, outDir, NBOW2=True):
pp = numpy.load(modelFile)
wv = pp['Wemb'].copy()
sklearn_pca = PCA(n_components=50)
Y_sklearn = sklearn_pca.fit_transform(wv)
Y_sklearn = numpy.asfarray( Y_sklearn, dtype='float' )
print "PCA transformation done ..."
print "Waitig for t-SNE computation ..."
reduced_vecs = bh_sne(Y_sklearn)
with open(outDir + "/tsne", "w") as out:
for i in range(len(reduced_vecs)):
out.write(str(reduced_vecs[i,0]) + " " + str(reduced_vecs[i,1]) + "\n")
out.close
print "t-SNE written to file ..."
if NBOW2:
av = pp['AVs'].astype('float64').T[0]
wts =[]
for i in range(len(wv)):
wt = sigmoid(numpy.dot(wv[i],av))
wts.append(wt)
with open(outDir + "/wts", "w") as out:
for i in range(len(wts)):
out.write(str(wts[i]) + "\n")
out.close
开发者ID:fangzheng354,项目名称:nbow2-text-class,代码行数:30,代码来源:drawFns.py
示例6: meta_pca_sne
def meta_pca_sne(exID, experiment_folder): # put exID back
plot_subfolder = experiment_folder + "/meta_pca"
plot_data_directory = check_create_directory(plot_subfolder)
filename = "{}/META".format(plot_data_directory)
# mongo stuff
dbClient = DatabaseClient()
filteredResults = dbClient.query(exID)
if filteredResults is None:
print "No results"
return
filteredId = filteredResults[0]['_id']
experiment = dbClient.get(filteredId)
list_of_coords = experiment['DATA']['TSNE_DATA']
np_list = np.asarray(list_of_coords)
print "META shape: ", np_list.shape
epochs = experiment['DATA']['EPOCH']
layers = experiment['DATA']['LAYER']
labels = []
no_samples = len(epochs)
for i in range(no_samples):
labels.append(epochs[i] + (layers[i]*0.1))
# labels.append(epochs[i])
labels = np.asarray(labels)
labels = labels[:500]
np_list = np_list[:,:500]
# print "LIST", np_list
# print "list size:", np_list.shape
perp = 10.0
no_data_shape = np_list.shape[0]
if (((perp / 3.0) - 1.0) < no_data_shape):
perp = (no_data_shape / 3.0) - 1.0
sne_co = bh_sne(np_list, perplexity=perp, theta=0.5)
print "sne", sne_co.shape
print "labels", labels
plt.scatter(sne_co[:,0], sne_co[:,1], c=labels)
plt.savefig(filename, dpi=120)
plt.close()
# plt.show()
print "show"
flat_coords = np.reshape(sne_co, (1,-1))
flat_coords = flat_coords.tolist()[0]
experiment['DATA']['META'] = flat_coords
updatedObject = dbClient.update(filteredId, experiment)
开发者ID:ssfg,项目名称:nnvis,代码行数:60,代码来源:neural_net_saving.py
示例7: perform_tsne_transformation
def perform_tsne_transformation(X):
######### There is a bug in scikit-learn, hence cant do tsne with it. ##############
# tsne_model = TSNE(n_components=2,random_state=0)
# X_new = tsne_model.fit_transform(X)
X = np.asarray(X).astype('float64')
X = X.reshape((X.shape[0],-1))
X_new = bh_sne(X,perplexity=5)
return X_new
开发者ID:till-tomorrow,项目名称:Conversation-Bot,代码行数:9,代码来源:word_embeddings.py
示例8: tsne
def tsne(embedding, word_2_id, sample_size = 1000):
embedding_2d = bh_sne(embedding.astype(np.float64))
keys = random.sample(word_2_id.keys(), sample_size)
fig, ax = plt.subplots()
for k in keys:
id = word_2_id[k]
ax.annotate(k, (embedding_2d[id, 0], embedding_2d[id, 1]))
plt.show()
开发者ID:liusiqi43,项目名称:ox-computational-linguistics,代码行数:9,代码来源:visualise.py
示例9: visualize
def visualize(vecs):
print "Got the vectors, now doing dimesnion reduction..."
reduced = bh_sne(vecs)
print "Reduction done, now plotting: "
for i in range(len(reduced)):
plt.plot(vecs[i,0], vecs[i,1], marker='o', markersize=8)
plt.show()
开发者ID:bitliner,项目名称:Automatic-Extraction-of-Most-Relevant-Insights-From-Customer-Reviews,代码行数:9,代码来源:visualization.py
示例10: extract_tsne_feat
def extract_tsne_feat():
"""
Extract tsne features.
Note: python2 only.
"""
df_w2vlem_join = pd.read_csv('tmp2/df_w2vlem_join.csv', index_col=0)
df_feat = pd.DataFrame(index=df_w2vlem_join.index.values)
tfidf = TfidfVectorizer(ngram_range=(1,4), stop_words='english', min_df=2)
X_t = tfidf.fit_transform(df_w2vlem_join['t_w2v'].tolist())
svd = TruncatedSVD(n_components=100, random_state=2016)
X_svd = svd.fit_transform(X_t)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_t_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_t_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_feat.to_csv('tmp2/tsne_t', index=False)
print(df_feat)
tfidf = TfidfVectorizer(ngram_range=(1,4), stop_words='english', min_df=2)
X_q = tfidf.fit_transform(df_w2vlem_join['q_w2v'].tolist())
X_tq = sp.hstack([X_t, X_q]).tocsr()
svd = TruncatedSVD(n_components=100, random_state=2016)
X_svd = svd.fit_transform(X_tq)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_qt_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_qt_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_feat.to_csv('tmp2/tsne_qt', index=False)
df_feat = pd.read_csv('tmp2/tsne_qt')
print(df_feat)
tfidf = TfidfVectorizer(ngram_range=(1,3), stop_words='english', min_df=2)
X_d = tfidf.fit_transform(df_w2vlem_join['d_w2v'].tolist())
svd = TruncatedSVD(n_components=70, random_state=2016)
X_svd = svd.fit_transform(X_d)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_desc_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_desc_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_tsne_feats = df_feat
df_tsne_feats.to_csv('tmp2/df_tsne_feats.csv')
开发者ID:amsqr,项目名称:hd,代码行数:44,代码来源:python2_tsne.py
示例11: test_iris
def test_iris():
from tsne import bh_sne
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
X_2d = bh_sne(X)
开发者ID:10XDev,项目名称:tsne,代码行数:10,代码来源:test_iris.py
示例12: _tsne
def _tsne(X, dir_str="*.wav", perplexity=3, plotting=False):
"""
Utility function to compute tsne
"""
flist = sorted(glob.glob(dir_str))
Z = bh_sne(X, perplexity=perplexity)
if plotting:
figure()
plot(Z[:,0], Z[:,1],'r.')
[[text(p[0],p[1],'%s'%flist[i],fontsize=12) for i,p in enumerate(Z)]]
return Z
开发者ID:bregmanstudio,项目名称:voxid,代码行数:11,代码来源:voweltimbre.py
示例13: visualize_tsne
def visualize_tsne():
"""
play around with tsne to visualize image space
"""
import matplotlib.pyplot as plt
from tsne import bh_sne
tracker_df = pd.read_pickle('./tracker.pkl')
dfs = []
for category in listdir('/Volumes/micro/recommend-a-graham/imgs/'):
for user in listdir('/Volumes/micro/recommend-a-graham/imgs/'+category):
img_ids = listdir('/Volumes/micro/recommend-a-graham/imgs/{}/{}/'.format(category, user))
sub_df = tracker_df[tracker_df.img_id.apply(lambda x: x in img_ids)]
# user_df = pd.read_pickle('../fc8_pkls/fc8_{}.pkl'.format(user))
user_df = pd.read_pickle('../fc7_pkls/fc7_{}.pkl'.format(user))
user_df = user_df[user_df.shortcode.apply(lambda x: x in sub_df.shortcode.values)]
dfs.append(pd.merge(sub_df, user_df, on='shortcode'))
dfs = pd.concat(dfs, axis=0)
dfs.reset_index(inplace=True)
# dfs.fc8 = dfs.fc8.apply(lambda x: x.reshape(1, x.shape[0]))
dfs.fc7 = dfs.fc7.apply(lambda x: x.reshape(1, x.shape[0]))
# vectors = dfs.fc8.values
vectors = dfs.fc7.values
x_data = vectors[0]
for vector in vectors[1:]:
x_data = np.concatenate((x_data, vector), axis=0)
print x_data.shape
y_dict = {k:i for i,k in enumerate(dfs.username.unique())}
# y_dict = {k:i for i,k in enumerate(['cats', 'dogs', 'foodies',
# 'models','most_popular',
# 'photographers', 'travel'])}
y_data = dfs.username.apply(lambda x: y_dict[x]).values
vis_data = bh_sne(x_data)
vis_x = vis_data[:,0]
vis_y = vis_data[:,1]
plt.scatter(vis_x, vis_y, c=y_data, cmap=plt.cm.get_cmap("jet", 28))
cbar = plt.colorbar()
cbar.set_ticks([i*29./28 + 29./56 for i in range(28)])
# cbar.set_ticklabels(y_dict.keys())
cbar.set_ticklabels(zip(dfs.username.unique(), [user_cat_dict[i] for i in dfs.username.unique()]))
plt.clim(0, 29)
plt.title('tsne, fc7, 100img_per_user, 4user_per_categ')
plt.show()
开发者ID:theod07,项目名称:recommend-a-graham,代码行数:51,代码来源:tfidf_fc8.py
示例14: run
def run(self):
config = Config.get()
# Create the embedding.
featureDict = Utils.read_features(config.getSample("ExternalFiles",
"vecs_with_id"),
id_set=getSampleIds())
keys = list(featureDict.keys())
vectors = np.array([featureDict[vID]["vector"] for vID in keys])
out = bh_sne(vectors,
pca_d=None,
theta=config.getfloat("PreprocessingConstants", "tsne_theta"))
X, Y = list(out[:, 0]), list(out[:, 1])
Utils.write_tsv(config.getSample("ExternalFiles", "article_embedding"),
("index", "x", "y"), keys, X, Y)
开发者ID:Bboatman,项目名称:proceduralMapGeneration,代码行数:14,代码来源:Coordinates.py
示例15: extract_w2v_tsne_feat
def extract_w2v_tsne_feat():
"""
Extract w2v tsne features.
Note: python2 only. Worst in cv, so do not use this.
"""
df_w2v_feats = pd.read_csv('tmp2/df_w2v_feats.csv', index_col=0)
X = df_w2v_feats.values
df_feat = pd.DataFrame(index=df_w2v_feats.index.values)
X_scaled = StandardScaler().fit_transform(X)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_t_1'] = X_tsne[:len(df_w2v_feats), 0]
df_feat['tsne_t_2'] = X_tsne[:len(df_w2v_feats), 1]
df_feat.to_csv('tmp2/df_tsne_w2v_feats.csv')
开发者ID:amsqr,项目名称:hd,代码行数:15,代码来源:python2_tsne.py
示例16: make_sample_df
def make_sample_df(labels, np, labeled_data, limit, algorithm_name, dims, cores):
used_labels = np.unique(labels)[0:3]
label_dfs = []
for label in used_labels:
subset = labeled_data[labeled_data[:,0] == label,1:] # select all those elements with this label
# sub-sample the stratified subset
num_samples = min(limit,subset.shape[0])
indices = np.arange(subset.shape[0])
np.random.shuffle(indices)
sampled_pts = subset[indices[:num_samples],:]
data_2d = bh_sne(sampled_pts)
num_records = data_2d.shape[0]
label_dfs.append(pd.DataFrame({"X": data_2d[:,0], "Y": data_2d[:,1], "dimension": [dims for i in range(num_records)], "label": [label_dict[label] for i in range(num_records)], "algorithm": [algorithm_name for i in range(num_records)]}))
return label_dfs
开发者ID:lzamparo,项目名称:SdA_reduce,代码行数:15,代码来源:two_d_samples_h5_SdA_csv.py
示例17: main
def main(datafile, normalize, ndims, copula, clusteroutput, subsample):
X, features = read_sah_h5(datafile)
I, all_features = read_sah_h5(datafile, just_good=False)
if 'id' in all_features:
ids = X[:, all_features.index('id')]
else:
ids = np.arange(len(X)).astype(int)
Xorig = X
if normalize:
mean = np.average(X, axis=0)
std = np.std(X, axis=0)
std[np.nonzero(std == 0.0)] = 1.0 # Avoid NaNs
X = (X - mean) / std
idx = np.random.randint(len(X), size=subsample)
X = X[idx]
ids = ids[idx]
if copula:
X = np.column_stack([copula_transform(x) for x in X.T])
# I added this for the time/freq clustering
# to emphasize the frequency feature
# X[:, 1] *= 1e-3
Y = bh_sne(X, d=ndims)
dbscan = DBSCAN(eps=1.75, min_samples=5)
C = dbscan.fit_predict(Y)
tree = ExtraTreesClassifier(n_estimators=100)
tree.fit(X, C)
for f, i in zip(features, tree.feature_importances_):
print '%s: %f' % (f, i)
with open(clusteroutput, 'w+') as f:
for c, i in zip(C, ids):
f.write('%d,%d\n' % (i, c))
pl.scatter(Y[:, 0], Y[:, 1], color=pl.cm.spectral(C.astype(float) / np.max(C)))
for c in np.unique(C):
pl.bar(0, 0, lw=0, ec='none', fc=pl.cm.spectral(float(c) / np.max(C)), label='Cluster %d' % c)
pl.legend()
pl.show()
开发者ID:UCBerkeleySETI,项目名称:blml,代码行数:48,代码来源:tsne_dimensionality_reduction.py
示例18: _fit_transform
def _fit_transform(self, x_in):
""" fit to data, and return the transform
Args:
x (numpy.array): Input numpy array
Returns:
x (numpy.array): Transformed array
"""
x_in = x_in.astype(float)
res = _tsne.bh_sne(
x_in,
perplexity=self.perplexity,
theta=self.theta
)
return res
开发者ID:BioinformaticsArchive,项目名称:synergy-maps,代码行数:16,代码来源:reduction_methods.py
示例19: process_files
def process_files(in_file, out_file):
"""
Read data from in_file, and output to out_file
"""
sys.stderr.write('# in_file = %s, out_file = %s\n' % (in_file, out_file))
# input
sys.stderr.write('# Input from %s.\n' % (in_file))
inf = codecs.open(in_file, 'r', 'utf-8')
# output
sys.stderr.write('Output to %s\n' % out_file)
check_dir(out_file)
ouf = codecs.open(out_file, 'w', 'utf-8')
line_id = 0
words = []
embs = []
num_dim = -1
all_lines = inf.readlines()
num_words = len(all_lines)
sys.stderr.write('# Processing file %s ...\n' % (in_file))
sys.stderr.write('# num words = %d\n' % (num_words))
for line in all_lines:
line = clean_line(line)
tokens = re.split('\s+', line)
word = tokens[0]
if line_id==0:
num_dim = len(tokens)-1
sys.stderr.write('# num dims = %d\n' % (num_dim))
X = np.zeros((num_words, num_dim))
emb = np.array(tokens[1:], dtype='|S4')
emb = emb.astype(np.float)
X[line_id, :] = emb
line_id = line_id + 1
if (line_id % 10000 == 0):
sys.stderr.write(' (%d) ' % line_id)
sys.stderr.write('Done! Num lines = %d\n' % line_id)
X_2d = bh_sne(X)
for ii in xrange(num_words):
ouf.write('%f %f\n' % (X_2d[ii, 0], X_2d[ii, 1]))
inf.close()
ouf.close()
开发者ID:alphadl,项目名称:nmt.hybrid,代码行数:46,代码来源:visual.py
示例20: get_tsne_mapping
def get_tsne_mapping(materials_list=None):
if materials_list is None:
# Doesn't call get_materials_list() when module is loaded
materials_list = get_materials_list()
try:
_log.info('Trying data cache for t-SNE mapping')
with open('tsne_points.pickle') as f:
_log.info('Using pickled t-SNE points')
return pickle.load(f)
except IOError:
X = vectorize_random(4)(materials_list)
X_2d = bh_sne(X)
_log.info('t-SNE plot at {}'.format(plot_tsne(X_2d)))
point_map = [{'pt': pt, 'material': m} for pt, m in
zip(X_2d, materials_list)]
with open('tsne_points.pickle', 'w') as f:
pickle.dump(point_map, f)
return point_map
开发者ID:HGeerlings,项目名称:eratosthenes,代码行数:18,代码来源:neigh_perf.py
注:本文中的tsne.bh_sne函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论