本文整理汇总了Python中utils.logging_utils._get_logger函数的典型用法代码示例。如果您正苦于以下问题:Python _get_logger函数的具体用法?Python _get_logger怎么用?Python _get_logger使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了_get_logger函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, model_folder, model_list, subm_prefix,
weight_opt_max_evals=10, w_min=-1., w_max=1.,
inst_subsample=0.5, inst_subsample_replacement=False,
inst_splitter=None,
model_subsample=1.0, model_subsample_replacement=True,
bagging_size=10, init_top_k=5, epsilon=0.00001,
multiprocessing=False, multiprocessing_num_cores=1,
enable_extreme=True, random_seed=0):
self.model_folder = model_folder
self.model_list = model_list
self.subm_prefix = subm_prefix
self.weight_opt_max_evals = weight_opt_max_evals
self.w_min = w_min
self.w_max = w_max
assert inst_subsample > 0 and inst_subsample <= 1.
self.inst_subsample = inst_subsample
self.inst_subsample_replacement = inst_subsample_replacement
self.inst_splitter = inst_splitter
assert model_subsample > 0
assert (type(model_subsample) == int) or (model_subsample <= 1.)
self.model_subsample = model_subsample
self.model_subsample_replacement = model_subsample_replacement
self.bagging_size = bagging_size
self.init_top_k = init_top_k
self.epsilon = epsilon
self.multiprocessing = multiprocessing
self.multiprocessing_num_cores = multiprocessing_num_cores
self.enable_extreme = enable_extreme
self.random_seed = random_seed
logname = "ensemble_selection_%s.log"%time_utils._timestamp()
self.logger = logging_utils._get_logger(config.LOG_DIR, logname)
self.n_models = len(self.model_list)
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:33,代码来源:extreme_ensemble_selection.py
示例2: main
def main():
logname = "generate_feature_intersect_position_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
generators = [
IntersectPosition_Ngram,
IntersectNormPosition_Ngram,
]
obs_fields_list = []
target_fields_list = []
## query in document
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"][1:2] )
## document in query
obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"][1:2] )
target_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
ngrams = [1,2,3,12,123][:3]
aggregation_mode = ["mean", "std", "max", "min", "median"]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
param_list = [ngram, aggregation_mode]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:25,代码来源:feature_intersect_position.py
示例3: run_tsne_lsa_ngram
def run_tsne_lsa_ngram():
logname = "generate_feature_tsne_lsa_ngram_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfAll.drop(["product_attribute_list"], inplace=True, axis=1)
generators = [TSNE_LSA_Word_Ngram, TSNE_LSA_Char_Ngram]
ngrams_list = [[1,2,3], [2,3,4,5]]
ngrams_list = [[1,2,3], [4]]
obs_fields = ["search_term", "search_term_alt", "search_term_auto_corrected", "product_title", "product_description"]
for generator,ngrams in zip(generators, ngrams_list):
for ngram in ngrams:
param_list = [ngram, config.SVD_DIM, config.SVD_N_ITER]
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger, force_corr=True)
sf.go()
generators = [TSNE_LSA_Word_Ngram_Pair]
ngrams = [1, 2]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_description"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for ngram in ngrams:
for generator in generators:
param_list = [ngram, config.SVD_DIM, config.SVD_N_ITER]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger, force_corr=True)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:28,代码来源:feature_vector_space.py
示例4: main
def main():
logname = "generate_feature_group_relevance_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfTrain = dfAll.iloc[:TRAIN_SIZE].copy()
## run python3 splitter.py first
split = pkl_utils._load("%s/splits_level1.pkl"%config.SPLIT_DIR)
n_iter = len(split)
## for cv
for i in range(n_iter):
trainInd, validInd = split[i][0], split[i][1]
dfTrain2 = dfTrain.iloc[trainInd].copy()
sub_feature_dir = "%s/Run%d" % (config.FEAT_DIR, i+1)
obs_fields = ["search_term", "product_title"][1:]
aggregation_mode = ["mean", "std", "max", "min", "median", "size"]
param_list = [dfAll["id"], dfTrain2, aggregation_mode]
sf = StandaloneFeatureWrapper(GroupRelevance, dfAll, obs_fields, param_list, sub_feature_dir, logger)
sf.go()
## for all
sub_feature_dir = "%s/All" % (config.FEAT_DIR)
obs_fields = ["search_term", "product_title"][1:]
aggregation_mode = ["mean", "std", "max", "min", "median", "size"]
param_list = [dfAll["id"], dfTrain, aggregation_mode]
sf = StandaloneFeatureWrapper(GroupRelevance, dfAll, obs_fields, param_list, sub_feature_dir, logger)
sf.go()
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:29,代码来源:feature_group_relevance.py
示例5: run_count
def run_count():
logname = "generate_feature_first_last_ngram_count_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
generators = [
FirstIntersectCount_Ngram,
LastIntersectCount_Ngram,
FirstIntersectRatio_Ngram,
LastIntersectRatio_Ngram,
]
obs_fields_list = []
target_fields_list = []
## query in document
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
## document in query
obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
target_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
ngrams = [1,2,3,12,123][:3]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
param_list = [ngram]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:27,代码来源:feature_first_last_ngram.py
示例6: main
def main(options):
logname = "[[email protected]%s]_[[email protected]%s]_hyperopt_%s.log"%(
options.feature_name, options.learner_name, time_utils._timestamp())
logger = logging_utils._get_logger(config.LOG_DIR, logname)
optimizer = TaskOptimizer(options.task_mode, options.learner_name,
options.feature_name, logger, options.max_evals, verbose=True, refit_once=options.refit_once)
optimizer.run()
开发者ID:yitang,项目名称:Kaggle_HomeDepot,代码行数:7,代码来源:task.py
示例7: main
def main(which):
logname = "generate_feature_stat_cooc_tfidf_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
generators = []
for w in which.split(","):
if w == "tf":
generators.append( StatCoocTF_Ngram )
elif w == "norm_tf":
generators.append( StatCoocNormTF_Ngram )
elif w == "tfidf":
generators.append( StatCoocTFIDF_Ngram )
elif w == "norm_tfidf":
generators.append( StatCoocNormTFIDF_Ngram )
elif w == "bm25":
generators.append( StatCoocBM25_Ngram )
obs_fields_list = []
target_fields_list = []
## query in document
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
## document in query
obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
target_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
ngrams = [1,2,3,12,123][:3]
aggregation_mode = ["mean", "std", "max", "min", "median"]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
param_list = [ngram, aggregation_mode]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
obs_fields_list = []
target_fields_list = []
## query in document
obs_fields_list.append( ["search_term_product_name"] )
target_fields_list.append( ["product_title_product_name"] )
ngrams = [1,2]
aggregation_mode = ["mean", "std", "max", "min", "median"]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
if ngram == 2:
# since product_name is of length 2, it makes no difference
# for various aggregation as there is only one item
param_list = [ngram, "mean"]
else:
param_list = [ngram, aggregation_mode]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:55,代码来源:feature_stat_cooc_tfidf.py
示例8: __init__
def __init__(self, feature_dict, feature_name, feature_suffix=".pkl", corr_threshold=0):
self.feature_name = feature_name
self.feature_dict = feature_dict
self.feature_suffix = feature_suffix
self.corr_threshold = corr_threshold
self.feature_names_basic = []
self.feature_names_cv = []
self.basic_only = 0
logname = "feature_combiner_%s_%s.log"%(feature_name, time_utils._timestamp())
self.logger = logging_utils._get_logger(config.LOG_DIR, logname)
self.splitter = splitter_level1
self.n_iter = n_iter
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:12,代码来源:feature_combiner.py
示例9: run_compression_distance
def run_compression_distance():
logname = "generate_feature_compression_distance_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
param_list = []
pf = PairwiseFeatureWrapper(CompressionDistance, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:13,代码来源:feature_distance.py
示例10: main
def main():
logname = "generate_feature_basic_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
## basic
generators = [DocId, DocLen, DocFreq, DocEntropy, DigitCount, DigitRatio]
obs_fields = ["search_term", "product_title", "product_description",
"product_attribute", "product_brand", "product_color"]
for generator in generators:
param_list = []
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
## for product_uid
generators = [DocIdEcho, DocFreq, ProductUidDummy1, ProductUidDummy2, ProductUidDummy3]
obs_fields = ["product_uid"]
for generator in generators:
param_list = []
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
## unique count
generators = [UniqueCount_Ngram, UniqueRatio_Ngram]
obs_fields = ["search_term", "product_title", "product_description",
"product_attribute", "product_brand", "product_color"]
ngrams = [1,2,3]
for generator in generators:
for ngram in ngrams:
param_list = [ngram]
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
## for product_attribute_list
generators = [
AttrCount,
AttrBulletCount,
AttrBulletRatio,
AttrNonBulletCount,
AttrNonBulletRatio,
AttrHasProductHeight,
AttrHasProductWidth,
AttrHasProductLength,
AttrHasProductDepth,
AttrHasIndoorOutdoor,
]
obs_fields = ["product_attribute_list"]
for generator in generators:
param_list = []
sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:51,代码来源:feature_basic.py
示例11: run_char_dist_sim
def run_char_dist_sim():
logname = "generate_feature_char_dist_sim_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfAll.drop(["product_attribute_list"], inplace=True, axis=1)
generators = [CharDistribution_Ratio, CharDistribution_CosineSim, CharDistribution_KL]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_description", "product_attribute"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
param_list = []
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:16,代码来源:feature_vector_space.py
示例12: main
def main():
logname = "generate_feature_doc2vec_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
#### NOTE: use data BEFORE STEMMING
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED)
doc2vec_model_dirs = []
model_prefixes = []
## doc2vec model trained with Homedepot dataset: brand/color/obs/title/description
doc2vec_model_dirs.append( config.DOC2VEC_MODEL_DIR + "/Homedepot-doc2vec-D%d-min_count%d.model"%(config.EMBEDDING_DIM, config.EMBEDDING_MIN_COUNT) )
model_prefixes.append( "Homedepot" )
for doc2vec_model_dir, model_prefix in zip(doc2vec_model_dirs, model_prefixes):
## load model
try:
if ".bin" in doc2vec_model_dir:
doc2vec_model = gensim.models.Doc2Vec.load_word2vec_format(doc2vec_model_dir, binary=True)
if ".txt" in doc2vec_model_dir:
doc2vec_model = gensim.models.Doc2Vec.load_word2vec_format(doc2vec_model_dir, binary=False)
else:
doc2vec_model = gensim.models.Doc2Vec.load(doc2vec_model_dir)
doc2vec_model_sent_label = pkl_utils._load(doc2vec_model_dir+".sent_label")
except:
continue
# ## standalone (not used in model building)
# obs_fields = ["search_term", "search_term_alt", "product_title", "product_description", "product_attribute"]
# generator = Doc2Vec_Vector
# param_list = [doc2vec_model, doc2vec_model_sent_label, model_prefix]
# sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
# sf.go()
## pairwise
generators = [
Doc2Vec_CosineSim,
Doc2Vec_RMSE,
Doc2Vec_Vdiff,
]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt"] )
target_fields_list.append( ["product_title", "product_description", "product_attribute", "product_brand", "product_color"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
param_list = [doc2vec_model, doc2vec_model_sent_label, model_prefix]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:46,代码来源:feature_doc2vec.py
示例13: run_ngram_jaccard
def run_ngram_jaccard():
logname = "generate_feature_ngram_jaccard_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
generators = [JaccardCoef_Ngram, DiceDistance_Ngram]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"][:2] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
ngrams = [1,2,3,12,123][:3]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
for ngram in ngrams:
param_list = [ngram]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:17,代码来源:feature_distance.py
示例14: run_tfidf_ngram_cosinesim
def run_tfidf_ngram_cosinesim():
logname = "generate_feature_tfidf_ngram_cosinesim_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfAll.drop(["product_attribute_list"], inplace=True, axis=1)
generators = [TFIDF_Word_Ngram_CosineSim, TFIDF_Char_Ngram_CosineSim]
ngrams_list = [[1,2,3], [2,3,4,5]]
ngrams_list = [[1,2,3], [4]]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_description", "product_attribute"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator,ngrams in zip(generators, ngrams_list):
for ngram in ngrams:
param_list = [ngram]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:19,代码来源:feature_vector_space.py
示例15: run_edit_distance
def run_edit_distance():
logname = "generate_feature_edit_distance_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"][1:2] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
ngrams = [1,2,3,12,123][:3]
aggregation_mode_prev = ["mean", "max", "min", "median"]
aggregation_mode = ["mean", "std", "max", "min", "median"]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
param_list = []
PairwiseFeatureWrapper(EditDistance, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
for ngram in ngrams:
param_list = [ngram, aggregation_mode_prev, aggregation_mode]
pf = PairwiseFeatureWrapper(EditDistance_Ngram, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:19,代码来源:feature_distance.py
示例16: main
def main():
logname = "generate_feature_query_quality_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
obs_corpus = []
query_suffix = []
# raw
dfAll = pkl_utils._load(config.ALL_DATA_RAW)
obs_corpus.append(dfAll["search_term"].values)
query_suffix.append("raw")
# after processing
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED)
obs_corpus.append(dfAll["search_term"].values)
query_suffix.append("lemmatized")
# after extracting product_name in search_term
obs_corpus.append(dfAll["search_term_product_name"].values)
query_suffix.append("product_name")
if "search_term_auto_corrected" in dfAll.columns:
# after auto correction
obs_corpus.append(dfAll["search_term_auto_corrected"].values)
query_suffix.append("corrected")
# after stemming
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
obs_corpus.append(dfAll["search_term"].values)
query_suffix.append("stemmed")
y_train = dfAll["relevance"].values[:TRAIN_SIZE]
for i in range(len(query_suffix)-1):
for j in range(i+1, len(query_suffix)):
ext = QueryQuality(obs_corpus[i], obs_corpus[j])
x = ext.transform()
dim = 1
fname = "%s_%s_x_%s_%dD"%(ext._get_feat_name(), query_suffix[i], query_suffix[j], dim)
pkl_utils._save(os.path.join(config.FEAT_DIR, fname+config.FEAT_FILE_SUFFIX), x)
corr = np_utils._corr(x[:TRAIN_SIZE], y_train)
logger.info("%s (%dD): corr = %.6f"%(fname, dim, corr))
# raw
dfAll = pkl_utils._load(config.ALL_DATA_RAW)
obs_fields = ["search_term"]
param_list = []
sf = StandaloneFeatureWrapper(IsInGoogleDict, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
sf.go()
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:43,代码来源:feature_query_quality.py
示例17: run_lsa_ngram_cooc
def run_lsa_ngram_cooc():
logname = "generate_feature_lsa_ngram_cooc_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfAll.drop(["product_attribute_list"], inplace=True, axis=1)
generators = [LSA_Word_Ngram_Cooc]
obs_ngrams = [1, 2]
target_ngrams = [1, 2]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_description"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for obs_ngram in obs_ngrams:
for target_ngram in target_ngrams:
for generator in generators:
param_list = [obs_ngram, target_ngram, config.SVD_DIM, config.SVD_N_ITER]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:20,代码来源:feature_vector_space.py
示例18: main
def main():
logname = "generate_feature_group_distance_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
dfTrain = dfAll.iloc[:TRAIN_SIZE].copy()
## run python3 splitter.py first
split = pkl_utils._load("%s/splits_level1.pkl"%config.SPLIT_DIR)
n_iter = len(split)
relevances_complete = [1, 1.25, 1.33, 1.5, 1.67, 1.75, 2, 2.25, 2.33, 2.5, 2.67, 2.75, 3]
relevances = [1, 1.33, 1.67, 2, 2.33, 2.67, 3]
ngrams = [1]
obs_fields = ["search_term"]
target_fields = ["product_title", "product_description"]
aggregation_mode = ["mean", "std", "max", "min", "median"]
## for cv
for i in range(n_iter):
trainInd, validInd = split[i][0], split[i][1]
dfTrain2 = dfTrain.iloc[trainInd].copy()
sub_feature_dir = "%s/Run%d" % (config.FEAT_DIR, i+1)
for target_field in target_fields:
for relevance in relevances:
for ngram in ngrams:
param_list = [dfAll["id"], dfTrain2, target_field, relevance, ngram, aggregation_mode]
pf = PairwiseFeatureWrapper(GroupRelevance_Ngram_Jaccard, dfAll, obs_fields, [target_field], param_list, sub_feature_dir, logger)
pf.go()
## for all
sub_feature_dir = "%s/All" % (config.FEAT_DIR)
for target_field in target_fields:
for relevance in relevances:
for ngram in ngrams:
param_list = [dfAll["id"], dfTrain, target_field, relevance, ngram, aggregation_mode]
pf = PairwiseFeatureWrapper(GroupRelevance_Ngram_Jaccard, dfAll, obs_fields, [target_field], param_list, sub_feature_dir, logger)
pf.go()
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:38,代码来源:feature_group_distance.py
示例19: main
def main():
logname = "generate_feature_wordnet_similarity_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
#### NOTE: use data BEFORE STEMMING
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED)
generators = [
WordNet_Path_Similarity,
WordNet_Lch_Similarity,
WordNet_Wup_Similarity,
]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_description", "product_attribute"] )
# double aggregation
aggregation_mode_prev = ["mean", "max", "min", "median"]
aggregation_mode = ["mean", "std", "max", "min", "median"]
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
param_list = [aggregation_mode_prev, aggregation_mode]
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:23,代码来源:feature_wordnet_similarity.py
示例20: main
def main():
logname = "generate_feature_match_%s.log"%time_utils._timestamp()
logger = logging_utils._get_logger(config.LOG_DIR, logname)
dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
generators = [
MatchQueryCount,
MatchQueryRatio,
LongestMatchSize,
LongestMatchRatio
]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
param_list = []
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
# product_attribute_list
generators = [
MatchAttrCount,
MatchAttrRatio,
IsIndoorOutdoorMatch,
]
obs_fields_list = []
target_fields_list = []
obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
target_fields_list.append( ["product_attribute_list"] )
for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
for generator in generators:
param_list = []
pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:36,代码来源:feature_match.py
注:本文中的utils.logging_utils._get_logger函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论