本文整理汇总了Python中utils.time_utils._timestamp函数的典型用法代码示例。如果您正苦于以下问题:Python _timestamp函数的具体用法?Python _timestamp怎么用?Python _timestamp使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了_timestamp函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: Article2Template
def Article2Template(lang="en"):
print "[%s]: generate article2template dict for language %s" % (time_utils._timestamp(), lang)
infile = open(config.ARTICLE_TEMPLATES[lang])
prefix = config.LANG_PREFIX[lang]
len_prefix = len(prefix)
articleDict = {}
for line in infile.readlines():
if line[0] != "<":
continue
row = line.split()
article = row[0][1:-1]
template = row[2][1:-1]
article = article[len_prefix:]
template = template[len_prefix:]
if "/" in template:
continue
if article in articleDict:
articleDict[article].append(template)
else:
articleDict[article] = [template, ]
print "%d articles in total" % len(articleDict)
pkl_utils._save(config.ARTICLE2TEMPLATE[lang], articleDict)
print "[%s]: generation complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:25,代码来源:parse.py
示例2: process
def process(lang, pivot):
print "[%s]: process for language %s" % (time_utils._timestamp(), lang)
linkDict = pkl_utils._load(config.ILL_DICT["%s2%s" % (lang, pivot)])
templateDict = pkl_utils._load(config.TEMPLATE2ARTICLE[lang])
articleDict = pkl_utils._load(config.ARTICLE2TEMPLATE[pivot])
mapping = pd.read_csv(config.EXISTING_MAPPING_OUTPUT[pivot], index_col="template")
template1 = []; template2 = []
article1 = []; article2 = []; ontology = []
for template in templateDict:
articles = templateDict[template]
for article in articles:
if article in linkDict:
tmp = linkDict[article]
template1.append(template)
article1.append(article)
article2.append(tmp)
if tmp in articleDict:
templateList = articleDict[tmp]
else:
templateList = []
c = ""
t = ""
for Template in templateList:
if Template in mapping.index:
c = mapping.at[Template, "ontology"]
t = Template
template2.append(t)
ontology.append(c)
data = {"template1":template1, "article1":article1, "template2":template2, \
"article2":article2, "ontology":ontology}
df = pd.DataFrame(data)
df.to_csv(config.ENTITY_MATRIX["%s2%s" % (lang, pivot)], index=False)
print "[%s]: processing complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:34,代码来源:parse.py
示例3: __init__
def __init__(self, model_folder, model_list, subm_prefix,
weight_opt_max_evals=10, w_min=-1., w_max=1.,
inst_subsample=0.5, inst_subsample_replacement=False,
inst_splitter=None,
model_subsample=1.0, model_subsample_replacement=True,
bagging_size=10, init_top_k=5, epsilon=0.00001,
multiprocessing=False, multiprocessing_num_cores=1,
enable_extreme=True, random_seed=0):
self.model_folder = model_folder
self.model_list = model_list
self.subm_prefix = subm_prefix
self.weight_opt_max_evals = weight_opt_max_evals
self.w_min = w_min
self.w_max = w_max
assert inst_subsample > 0 and inst_subsample <= 1.
self.inst_subsample = inst_subsample
self.inst_subsample_replacement = inst_subsample_replacement
self.inst_splitter = inst_splitter
assert model_subsample > 0
assert (type(model_subsample) == int) or (model_subsample <= 1.)
self.model_subsample = model_subsample
self.model_subsample_replacement = model_subsample_replacement
self.bagging_size = bagging_size
self.init_top_k = init_top_k
self.epsilon = epsilon
self.multiprocessing = multiprocessing
self.multiprocessing_num_cores = multiprocessing_num_cores
self.enable_extreme = enable_extreme
self.random_seed = random_seed
logname = "ensemble_selection_%s.log"%time_utils._timestamp()
self.logger = logging_utils._get_logger(config.LOG_DIR, logname)
self.n_models = len(self.model_list)
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:33,代码来源:extreme_ensemble_selection.py
示例4: main
def main():
logname = "generate_feature_group_relevance_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfTrain = dfAll.iloc[:TRAIN_SIZE].copy()
## run python3 splitter.py first
split = pkl_utils._load("%s/splits_level1.pkl"%config.SPLIT_DIR)
n_iter = len(split)
## for cv
for i in range(n_iter):
trainInd, validInd = split[i][0], split[i][1]
dfTrain2 = dfTrain.iloc[trainInd].copy()
sub_feature_dir = "%s/Run%d" % (config.FEAT_DIR, i+1)
obs_fields = ["search_term", "product_title"][1:]
aggregation_mode = ["mean", "std", "max", "min", "median", "size"]
param_list = [dfAll["id"], dfTrain2, aggregation_mode]
sf = StandaloneFeatureWrapper(GroupRelevance, dfAll, obs_fields, param_list, sub_feature_dir, logger)
sf.go()
## for all
sub_feature_dir = "%s/All" % (config.FEAT_DIR)
obs_fields = ["search_term", "product_title"][1:]
aggregation_mode = ["mean", "std", "max", "min", "median", "size"]
param_list = [dfAll["id"], dfTrain, aggregation_mode]
sf = StandaloneFeatureWrapper(GroupRelevance, dfAll, obs_fields, param_list, sub_feature_dir, logger)
sf.go()
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:29,代码来源:feature_group_relevance.py
示例5: run_tsne_lsa_ngram
def run_tsne_lsa_ngram():
logname = "generate_feature_tsne_lsa_ngram_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfAll.drop(["product_attribute_list"], inplace=True, axis=1)
generators = [TSNE_LSA_Word_Ngram, TSNE_LSA_Char_Ngram]
ngrams_list = [[1,2,3], [2,3,4,5]]
ngrams_list = [[1,2,3], [4]]
obs_fields = ["search_term", "search_term_alt", "search_term_auto_corrected", "product_title", "product_description"]
for generator,ngrams in zip(generators, ngrams_list):
for ngram in ngrams:
param_list = [ngram, config.SVD_DIM, config.SVD_N_ITER]
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger, force_corr=True)
sf.go()
generators = [TSNE_LSA_Word_Ngram_Pair]
ngrams = [1, 2]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_description"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for ngram in ngrams:
for generator in generators:
param_list = [ngram, config.SVD_DIM, config.SVD_N_ITER]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger, force_corr=True)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:28,代码来源:feature_vector_space.py
示例6: run_count
def run_count():
logname = "generate_feature_first_last_ngram_count_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
generators = [
FirstIntersectCount_Ngram,
LastIntersectCount_Ngram,
FirstIntersectRatio_Ngram,
LastIntersectRatio_Ngram,
]
obs_fields_list = []
target_fields_list = []
## query in document
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
## document in query
obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
target_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
ngrams = [1,2,3,12,123][:3]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
param_list = [ngram]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:27,代码来源:feature_first_last_ngram.py
示例7: main
def main(options):
logname = "[[email protected]%s]_[[email protected]%s]_hyperopt_%s.log"%(
options.feature_name, options.learner_name, time_utils._timestamp())
logger = logging_utils._get_logger(config.LOG_DIR, logname)
optimizer = TaskOptimizer(options.task_mode, options.learner_name,
options.feature_name, logger, options.max_evals, verbose=True, refit_once=options.refit_once)
optimizer.run()
开发者ID:yitang,项目名称:Kaggle_HomeDepot,代码行数:7,代码来源:task.py
示例8: main
def main():
logname = "generate_feature_intersect_position_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
generators = [
IntersectPosition_Ngram,
IntersectNormPosition_Ngram,
]
obs_fields_list = []
target_fields_list = []
## query in document
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"][1:2] )
## document in query
obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"][1:2] )
target_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
ngrams = [1,2,3,12,123][:3]
aggregation_mode = ["mean", "std", "max", "min", "median"]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
param_list = [ngram, aggregation_mode]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:25,代码来源:feature_intersect_position.py
示例9: parse_args
def parse_args(parser):
parser.add_option("-d", "--dim", default=1, type=int, dest="lsa_columns",
help="lsa_columns")
parser.add_option("-o", "--outfile", default="feature_conf_%s.py"%time_utils._timestamp(),
type="string", dest="outfile", help="outfile")
(options, args) = parser.parse_args()
return options, args
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:8,代码来源:get_feature_conf_linear_stacking.py
示例10: parse_args
def parse_args(parser):
parser.add_option("-l", "--level", default=2,
type="int", dest="level", help="level")
parser.add_option("-t", "--top", default=10,
type="int", dest="topN", help="top-N")
parser.add_option("-o", "--outfile",
default="stacking_feature_conf_%s.py"%time_utils._timestamp(),
type="string", dest="outfile", help="outfile")
(options, args) = parser.parse_args()
return options, args
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:10,代码来源:get_stacking_feature_conf.py
示例11: main
def main(which):
logname = "generate_feature_stat_cooc_tfidf_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
generators = []
for w in which.split(","):
if w == "tf":
generators.append( StatCoocTF_Ngram )
elif w == "norm_tf":
generators.append( StatCoocNormTF_Ngram )
elif w == "tfidf":
generators.append( StatCoocTFIDF_Ngram )
elif w == "norm_tfidf":
generators.append( StatCoocNormTFIDF_Ngram )
elif w == "bm25":
generators.append( StatCoocBM25_Ngram )
obs_fields_list = []
target_fields_list = []
## query in document
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
## document in query
obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
target_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
ngrams = [1,2,3,12,123][:3]
aggregation_mode = ["mean", "std", "max", "min", "median"]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
param_list = [ngram, aggregation_mode]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
obs_fields_list = []
target_fields_list = []
## query in document
obs_fields_list.append( ["search_term_product_name"] )
target_fields_list.append( ["product_title_product_name"] )
ngrams = [1,2]
aggregation_mode = ["mean", "std", "max", "min", "median"]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
if ngram == 2:
# since product_name is of length 2, it makes no difference
# for various aggregation as there is only one item
param_list = [ngram, "mean"]
else:
param_list = [ngram, aggregation_mode]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:55,代码来源:feature_stat_cooc_tfidf.py
示例12: __init__
def __init__(self, feature_dict, feature_name, feature_suffix=".pkl", corr_threshold=0):
self.feature_name = feature_name
self.feature_dict = feature_dict
self.feature_suffix = feature_suffix
self.corr_threshold = corr_threshold
self.feature_names_basic = []
self.feature_names_cv = []
self.basic_only = 0
logname = "feature_combiner_%s_%s.log"%(feature_name, time_utils._timestamp())
self.logger = logging_utils._get_logger(config.LOG_DIR, logname)
self.splitter = splitter_level1
self.n_iter = n_iter
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:12,代码来源:feature_combiner.py
示例13: getExistingMapping
def getExistingMapping(lang="en"):
print "[%s]: parse existing mapping for language %s" % (time_utils._timestamp(), lang)
G = g.Graph()
G.parse(config.EXISTING_MAPPING[lang], format="n3")
q = '''
PREFIX rr: <http://www.w3.org/ns/r2rml#>
SELECT ?template ?class
WHERE {
?template rr:subjectMap ?mapping .
?mapping rr:class ?class .
}
'''
results = G.query(q)
mapping = [row[0] for row in results]
ontology = [row[1] for row in results]
df = pd.DataFrame({'mapping':mapping, 'ontology':ontology})
df["template"] = df["mapping"].apply(lambda x: config.TEMPLATE_NAME[lang] + x[47:])
df.to_csv(config.EXISTING_MAPPING_OUTPUT[lang], index=False)
print "[%s]: parsing complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:22,代码来源:parse.py
示例14: run_compression_distance
def run_compression_distance():
logname = "generate_feature_compression_distance_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
param_list = []
pf = PairwiseFeatureWrapper(CompressionDistance, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:13,代码来源:feature_distance.py
示例15: main
def main():
logname = "generate_feature_basic_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
## basic
generators = [DocId, DocLen, DocFreq, DocEntropy, DigitCount, DigitRatio]
obs_fields = ["search_term", "product_title", "product_description",
"product_attribute", "product_brand", "product_color"]
for generator in generators:
param_list = []
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
## for product_uid
generators = [DocIdEcho, DocFreq, ProductUidDummy1, ProductUidDummy2, ProductUidDummy3]
obs_fields = ["product_uid"]
for generator in generators:
param_list = []
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
## unique count
generators = [UniqueCount_Ngram, UniqueRatio_Ngram]
obs_fields = ["search_term", "product_title", "product_description",
"product_attribute", "product_brand", "product_color"]
ngrams = [1,2,3]
for generator in generators:
for ngram in ngrams:
param_list = [ngram]
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
## for product_attribute_list
generators = [
AttrCount,
AttrBulletCount,
AttrBulletRatio,
AttrNonBulletCount,
AttrNonBulletRatio,
AttrHasProductHeight,
AttrHasProductWidth,
AttrHasProductLength,
AttrHasProductDepth,
AttrHasIndoorOutdoor,
]
obs_fields = ["product_attribute_list"]
for generator in generators:
param_list = []
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:51,代码来源:feature_basic.py
示例16: getILL
def getILL(lang, target):
print "[%s]: generate ILL dict from language %s to language %s" % (time_utils._timestamp(), lang, target)
infile = open(config.ILL[lang])
prefix1 = config.LANG_PREFIX[lang]
prefix2 = config.LANG_PREFIX[target]
len1 = len(prefix1)
len2 = len(prefix2)
linkDict = {}
for line in infile.readlines():
if line[0] != "<":
continue
row = line.split()
lang1 = row[0][1:-1]
lang2 = row[2][1:-1]
if prefix1 not in lang1:
continue
if prefix2 not in lang2:
continue
lang1 = lang1[len1:]
lang2 = lang2[len2:]
linkDict[lang1] = lang2
print "%d links in total" % len(linkDict)
pkl_utils._save(config.ILL_DICT["%s2%s" % (lang, target)], linkDict)
print "[%s]: generation complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:24,代码来源:parse.py
示例17: main
def main():
print "[%s]: generate ontology hierarchy tree" % (time_utils._timestamp())
G = g.Graph()
G.parse(config.ONTOLOGY, format="n3")
q = '''
PREFIX rr: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?child ?parent
WHERE {
?child rr:subClassOf ?parent .
}'''
results = G.query(q)
ontologyDict = {}
for row in results:
child = str(row[0])
parent = str(row[1])
if parent in ontologyDict:
ontologyDict[parent].append(child)
else:
ontologyDict[parent] = [child,]
pkl_utils._save(config.ONTOLOGY_TREE, ontologyDict)
print "[%s]: generation complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:24,代码来源:ontology.py
示例18: parse_args
def parse_args(parser):
parser.add_option("-l", "--level", default=1, type="int",
dest="feature_level", help="feature level, e.g., 1, 2, 3")
parser.add_option("-c", "--config", default="feature_conf", type="string",
dest="feature_conf", help="feature config name")
parser.add_option("-n", "--name", default="basic%s"%time_utils._timestamp(),
type="string", dest="feature_name", help="feature name")
parser.add_option("-s", "--suffix", default=".pkl", type="string",
dest="feature_suffix", help="feature suffix")
parser.add_option("-m", "--meta_config", default="feature_conf_meta",
type="string", dest="feature_conf_meta", help="meta feature config name")
parser.add_option("-t", "--threshold", default=0.0, type="float",
dest="corr_threshold", help="correlation threshold for dropping features")
(options, args) = parser.parse_args()
return options, args
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:15,代码来源:feature_combiner.py
示例19: run_char_dist_sim
def run_char_dist_sim():
logname = "generate_feature_char_dist_sim_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfAll.drop(["product_attribute_list"], inplace=True, axis=1)
generators = [CharDistribution_Ratio, CharDistribution_CosineSim, CharDistribution_KL]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_description", "product_attribute"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
param_list = []
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:16,代码来源:feature_vector_space.py
示例20: main
def main():
logname = "generate_feature_doc2vec_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
#### NOTE: use data BEFORE STEMMING
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED)
doc2vec_model_dirs = []
model_prefixes = []
## doc2vec model trained with Homedepot dataset: brand/color/obs/title/description
doc2vec_model_dirs.append( config.DOC2VEC_MODEL_DIR + "/Homedepot-doc2vec-D%d-min_count%d.model"%(config.EMBEDDING_DIM, config.EMBEDDING_MIN_COUNT) )
model_prefixes.append( "Homedepot" )
for doc2vec_model_dir, model_prefix in zip(doc2vec_model_dirs, model_prefixes):
## load model
try:
if ".bin" in doc2vec_model_dir:
doc2vec_model = gensim.models.Doc2Vec.load_word2vec_format(doc2vec_model_dir, binary=True)
if ".txt" in doc2vec_model_dir:
doc2vec_model = gensim.models.Doc2Vec.load_word2vec_format(doc2vec_model_dir, binary=False)
else:
doc2vec_model = gensim.models.Doc2Vec.load(doc2vec_model_dir)
doc2vec_model_sent_label = pkl_utils._load(doc2vec_model_dir+".sent_label")
except:
continue
# ## standalone (not used in model building)
# obs_fields = ["search_term", "search_term_alt", "product_title", "product_description", "product_attribute"]
# generator = Doc2Vec_Vector
# param_list = [doc2vec_model, doc2vec_model_sent_label, model_prefix]
# sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
# sf.go()
## pairwise
generators = [
Doc2Vec_CosineSim,
Doc2Vec_RMSE,
Doc2Vec_Vdiff,
]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt"] )
target_fields_list.append( ["product_title", "product_description", "product_attribute", "product_brand", "product_color"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
param_list = [doc2vec_model, doc2vec_model_sent_label, model_prefix]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:46,代码来源:feature_doc2vec.py
注:本文中的utils.time_utils._timestamp函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论