• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python time_utils._timestamp函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.time_utils._timestamp函数的典型用法代码示例。如果您正苦于以下问题:Python _timestamp函数的具体用法?Python _timestamp怎么用?Python _timestamp使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了_timestamp函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: Article2Template

def Article2Template(lang="en"):
	print "[%s]: generate article2template dict for language %s" % (time_utils._timestamp(), lang)
	infile = open(config.ARTICLE_TEMPLATES[lang])
	prefix = config.LANG_PREFIX[lang]
	len_prefix = len(prefix)
	articleDict = {}
	for line in infile.readlines():
		if line[0] != "<":
			continue
		row = line.split()
		article = row[0][1:-1]
		template = row[2][1:-1]
		article = article[len_prefix:]
		template = template[len_prefix:]

		if "/" in template:
			continue

		if article in articleDict:
			articleDict[article].append(template)
		else:
			articleDict[article] = [template, ]
	print "%d articles in total" % len(articleDict)
	pkl_utils._save(config.ARTICLE2TEMPLATE[lang], articleDict)
	print "[%s]: generation complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:25,代码来源:parse.py


示例2: process

def process(lang, pivot):
	print "[%s]: process for language %s" % (time_utils._timestamp(), lang)
	linkDict = pkl_utils._load(config.ILL_DICT["%s2%s" % (lang, pivot)])
	templateDict = pkl_utils._load(config.TEMPLATE2ARTICLE[lang])
	articleDict = pkl_utils._load(config.ARTICLE2TEMPLATE[pivot])
	mapping = pd.read_csv(config.EXISTING_MAPPING_OUTPUT[pivot], index_col="template")
	template1 = []; template2 = []
	article1 = []; article2 = []; ontology = []
	for template in templateDict:
		articles = templateDict[template]
		for article in articles:
			if article in linkDict:
				tmp = linkDict[article]
				template1.append(template)
				article1.append(article)
				article2.append(tmp)
				if tmp in articleDict:
					templateList = articleDict[tmp]
				else:
					templateList = []
				c = ""
				t = ""
				for Template in templateList:
					if Template in mapping.index:
						c = mapping.at[Template, "ontology"]
						t = Template
				template2.append(t)
				ontology.append(c)

	data = {"template1":template1, "article1":article1, "template2":template2, \
			"article2":article2, "ontology":ontology}
	df = pd.DataFrame(data)
	df.to_csv(config.ENTITY_MATRIX["%s2%s" % (lang, pivot)], index=False)
	print "[%s]: processing complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:34,代码来源:parse.py


示例3: __init__

    def __init__(self, model_folder, model_list, subm_prefix, 
                weight_opt_max_evals=10, w_min=-1., w_max=1., 
                inst_subsample=0.5, inst_subsample_replacement=False, 
                inst_splitter=None,
                model_subsample=1.0, model_subsample_replacement=True,
                bagging_size=10, init_top_k=5, epsilon=0.00001, 
                multiprocessing=False, multiprocessing_num_cores=1,
                enable_extreme=True, random_seed=0):

        self.model_folder = model_folder
        self.model_list = model_list
        self.subm_prefix = subm_prefix
        self.weight_opt_max_evals = weight_opt_max_evals
        self.w_min = w_min
        self.w_max = w_max
        assert inst_subsample > 0 and inst_subsample <= 1.
        self.inst_subsample = inst_subsample
        self.inst_subsample_replacement = inst_subsample_replacement
        self.inst_splitter = inst_splitter
        assert model_subsample > 0
        assert (type(model_subsample) == int) or (model_subsample <= 1.)
        self.model_subsample = model_subsample
        self.model_subsample_replacement = model_subsample_replacement
        self.bagging_size = bagging_size
        self.init_top_k = init_top_k
        self.epsilon = epsilon
        self.multiprocessing = multiprocessing
        self.multiprocessing_num_cores = multiprocessing_num_cores
        self.enable_extreme = enable_extreme
        self.random_seed = random_seed
        logname = "ensemble_selection_%s.log"%time_utils._timestamp()
        self.logger = logging_utils._get_logger(config.LOG_DIR, logname)
        self.n_models = len(self.model_list)
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:33,代码来源:extreme_ensemble_selection.py


示例4: main

def main():
    logname = "generate_feature_group_relevance_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
    dfTrain = dfAll.iloc[:TRAIN_SIZE].copy()

    ## run python3 splitter.py first
    split = pkl_utils._load("%s/splits_level1.pkl"%config.SPLIT_DIR)
    n_iter = len(split)

    ## for cv
    for i in range(n_iter):
        trainInd, validInd = split[i][0], split[i][1]
        dfTrain2 = dfTrain.iloc[trainInd].copy()
        sub_feature_dir = "%s/Run%d" % (config.FEAT_DIR, i+1)

        obs_fields = ["search_term", "product_title"][1:]
        aggregation_mode = ["mean", "std", "max", "min", "median", "size"]
        param_list = [dfAll["id"], dfTrain2, aggregation_mode]
        sf = StandaloneFeatureWrapper(GroupRelevance, dfAll, obs_fields, param_list, sub_feature_dir, logger)
        sf.go()

    ## for all
    sub_feature_dir = "%s/All" % (config.FEAT_DIR)
    obs_fields = ["search_term", "product_title"][1:]
    aggregation_mode = ["mean", "std", "max", "min", "median", "size"]
    param_list = [dfAll["id"], dfTrain, aggregation_mode]
    sf = StandaloneFeatureWrapper(GroupRelevance, dfAll, obs_fields, param_list, sub_feature_dir, logger)
    sf.go()
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:29,代码来源:feature_group_relevance.py


示例5: run_tsne_lsa_ngram

def run_tsne_lsa_ngram():
    logname = "generate_feature_tsne_lsa_ngram_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
    dfAll.drop(["product_attribute_list"], inplace=True, axis=1)

    generators = [TSNE_LSA_Word_Ngram, TSNE_LSA_Char_Ngram]
    ngrams_list = [[1,2,3], [2,3,4,5]]
    ngrams_list = [[1,2,3], [4]]
    obs_fields = ["search_term", "search_term_alt", "search_term_auto_corrected", "product_title", "product_description"]
    for generator,ngrams in zip(generators, ngrams_list):
        for ngram in ngrams:
            param_list = [ngram, config.SVD_DIM, config.SVD_N_ITER]
            sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger, force_corr=True)
            sf.go()

    generators = [TSNE_LSA_Word_Ngram_Pair]
    ngrams = [1, 2]
    obs_fields_list = []
    target_fields_list = []
    obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
    target_fields_list.append( ["product_title", "product_description"] )
    for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
        for ngram in ngrams:
            for generator in generators:
                param_list = [ngram, config.SVD_DIM, config.SVD_N_ITER]
                pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger, force_corr=True)
                pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:28,代码来源:feature_vector_space.py


示例6: run_count

def run_count():
    logname = "generate_feature_first_last_ngram_count_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)

    generators = [
        FirstIntersectCount_Ngram, 
        LastIntersectCount_Ngram, 
        FirstIntersectRatio_Ngram, 
        LastIntersectRatio_Ngram, 
    ]

    obs_fields_list = []
    target_fields_list = []
    ## query in document
    obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
    target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
    ## document in query
    obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
    target_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
    ngrams = [1,2,3,12,123][:3]
    for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
        for generator in generators:
            for ngram in ngrams:
                param_list = [ngram]
                pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
                pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:27,代码来源:feature_first_last_ngram.py


示例7: main

def main(options):
    logname = "[[email protected]%s]_[[email protected]%s]_hyperopt_%s.log"%(
        options.feature_name, options.learner_name, time_utils._timestamp())
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    optimizer = TaskOptimizer(options.task_mode, options.learner_name, 
        options.feature_name, logger, options.max_evals, verbose=True, refit_once=options.refit_once)
    optimizer.run()
开发者ID:yitang,项目名称:Kaggle_HomeDepot,代码行数:7,代码来源:task.py


示例8: main

def main():
    logname = "generate_feature_intersect_position_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)

    generators = [
        IntersectPosition_Ngram, 
        IntersectNormPosition_Ngram, 
    ]
    obs_fields_list = []
    target_fields_list = []
    ## query in document
    obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
    target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"][1:2] )
    ## document in query
    obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"][1:2] )
    target_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
    ngrams = [1,2,3,12,123][:3]
    aggregation_mode = ["mean", "std", "max", "min", "median"]
    for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
        for generator in generators:
            for ngram in ngrams:
                param_list = [ngram, aggregation_mode]
                pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
                pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:25,代码来源:feature_intersect_position.py


示例9: parse_args

def parse_args(parser):
    parser.add_option("-d", "--dim", default=1, type=int, dest="lsa_columns",
        help="lsa_columns")
    parser.add_option("-o", "--outfile", default="feature_conf_%s.py"%time_utils._timestamp(),
        type="string", dest="outfile", help="outfile")

    (options, args) = parser.parse_args()
    return options, args
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:8,代码来源:get_feature_conf_linear_stacking.py


示例10: parse_args

def parse_args(parser):
    parser.add_option("-l", "--level", default=2, 
        type="int", dest="level", help="level")
    parser.add_option("-t", "--top", default=10, 
        type="int", dest="topN", help="top-N")
    parser.add_option("-o", "--outfile", 
        default="stacking_feature_conf_%s.py"%time_utils._timestamp(),
        type="string", dest="outfile", help="outfile")
    (options, args) = parser.parse_args()
    return options, args
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:10,代码来源:get_stacking_feature_conf.py


示例11: main

def main(which):
    logname = "generate_feature_stat_cooc_tfidf_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)

    generators = []
    for w in which.split(","):
        if w == "tf":
            generators.append( StatCoocTF_Ngram )
        elif w == "norm_tf":
            generators.append( StatCoocNormTF_Ngram )
        elif w == "tfidf":
            generators.append( StatCoocTFIDF_Ngram )
        elif w == "norm_tfidf":
            generators.append( StatCoocNormTFIDF_Ngram )
        elif w == "bm25":
            generators.append( StatCoocBM25_Ngram )


    obs_fields_list = []
    target_fields_list = []
    ## query in document
    obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
    target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
    ## document in query
    obs_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
    target_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
    ngrams = [1,2,3,12,123][:3]
    aggregation_mode = ["mean", "std", "max", "min", "median"]
    for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
        for generator in generators:
            for ngram in ngrams:
                param_list = [ngram, aggregation_mode]
                pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
                pf.go()


    obs_fields_list = []
    target_fields_list = []
    ## query in document
    obs_fields_list.append( ["search_term_product_name"] )
    target_fields_list.append( ["product_title_product_name"] )
    ngrams = [1,2]
    aggregation_mode = ["mean", "std", "max", "min", "median"]
    for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
        for generator in generators:
            for ngram in ngrams:
                if ngram == 2:
                    # since product_name is of length 2, it makes no difference 
                    # for various aggregation as there is only one item
                    param_list = [ngram, "mean"]
                else:
                    param_list = [ngram, aggregation_mode]
                pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
                pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:55,代码来源:feature_stat_cooc_tfidf.py


示例12: __init__

 def __init__(self, feature_dict, feature_name, feature_suffix=".pkl", corr_threshold=0):
     self.feature_name = feature_name
     self.feature_dict = feature_dict
     self.feature_suffix = feature_suffix
     self.corr_threshold = corr_threshold
     self.feature_names_basic = []
     self.feature_names_cv = []
     self.basic_only = 0
     logname = "feature_combiner_%s_%s.log"%(feature_name, time_utils._timestamp())
     self.logger = logging_utils._get_logger(config.LOG_DIR, logname)
     self.splitter = splitter_level1
     self.n_iter = n_iter
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:12,代码来源:feature_combiner.py


示例13: getExistingMapping

def getExistingMapping(lang="en"):
	print "[%s]: parse existing mapping for language %s" % (time_utils._timestamp(), lang)
	G = g.Graph()
	G.parse(config.EXISTING_MAPPING[lang], format="n3")

	q = '''
PREFIX rr: <http://www.w3.org/ns/r2rml#>

SELECT ?template ?class
WHERE {
	?template rr:subjectMap ?mapping .
	?mapping rr:class ?class .
}
'''
	results = G.query(q)
	mapping = [row[0] for row in results]
	ontology = [row[1] for row in results]
	df = pd.DataFrame({'mapping':mapping, 'ontology':ontology})

	df["template"] = df["mapping"].apply(lambda x: config.TEMPLATE_NAME[lang] + x[47:])
	df.to_csv(config.EXISTING_MAPPING_OUTPUT[lang], index=False)
	print "[%s]: parsing complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:22,代码来源:parse.py


示例14: run_compression_distance

def run_compression_distance():
    logname = "generate_feature_compression_distance_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)

    obs_fields_list = []
    target_fields_list = []
    obs_fields_list.append( ["search_term", "search_term_product_name", "search_term_alt", "search_term_auto_corrected"] )
    target_fields_list.append( ["product_title", "product_title_product_name", "product_description", "product_attribute", "product_brand", "product_color"] )
    for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
        param_list = []
        pf = PairwiseFeatureWrapper(CompressionDistance, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
        pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:13,代码来源:feature_distance.py


示例15: main

def main():
    logname = "generate_feature_basic_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)

    ## basic
    generators = [DocId, DocLen, DocFreq, DocEntropy, DigitCount, DigitRatio]
    obs_fields = ["search_term", "product_title", "product_description", 
                "product_attribute", "product_brand", "product_color"]
    for generator in generators:
        param_list = []
        sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
        sf.go()

    ## for product_uid
    generators = [DocIdEcho, DocFreq, ProductUidDummy1, ProductUidDummy2, ProductUidDummy3]
    obs_fields = ["product_uid"]
    for generator in generators:
        param_list = []
        sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
        sf.go()

    ## unique count
    generators = [UniqueCount_Ngram, UniqueRatio_Ngram]
    obs_fields = ["search_term", "product_title", "product_description", 
    "product_attribute", "product_brand", "product_color"]
    ngrams = [1,2,3]
    for generator in generators:
        for ngram in ngrams:
            param_list = [ngram]
            sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
            sf.go()

    ## for product_attribute_list
    generators = [
        AttrCount, 
        AttrBulletCount, 
        AttrBulletRatio, 
        AttrNonBulletCount, 
        AttrNonBulletRatio,
        AttrHasProductHeight,
        AttrHasProductWidth,
        AttrHasProductLength,
        AttrHasProductDepth,
        AttrHasIndoorOutdoor,
    ]
    obs_fields = ["product_attribute_list"]
    for generator in generators:
        param_list = []
        sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
        sf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:51,代码来源:feature_basic.py


示例16: getILL

def getILL(lang, target):
	print "[%s]: generate ILL dict from language %s to language %s" % (time_utils._timestamp(), lang, target)
	infile = open(config.ILL[lang])
	prefix1 = config.LANG_PREFIX[lang]
	prefix2 = config.LANG_PREFIX[target]
	len1 = len(prefix1)
	len2 = len(prefix2)
	linkDict = {}
	for line in infile.readlines():
		if line[0] != "<":
			continue
		row = line.split()
		lang1 = row[0][1:-1]
		lang2 = row[2][1:-1]
		if prefix1 not in lang1:
			continue
		if prefix2 not in lang2:
			continue
		lang1 = lang1[len1:]
		lang2 = lang2[len2:]
		linkDict[lang1] = lang2
	print "%d links in total" % len(linkDict)
	pkl_utils._save(config.ILL_DICT["%s2%s" % (lang, target)], linkDict)
	print "[%s]: generation complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:24,代码来源:parse.py


示例17: main

def main():
	print "[%s]: generate ontology hierarchy tree" % (time_utils._timestamp())
	G = g.Graph()
	G.parse(config.ONTOLOGY, format="n3")

	q = '''
PREFIX rr: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?child ?parent
WHERE {
	?child rr:subClassOf ?parent .
}'''
	
	results = G.query(q)
	ontologyDict = {}
	for row in results:
		child = str(row[0])
		parent = str(row[1])
		if parent in ontologyDict:
			ontologyDict[parent].append(child)
		else:
			ontologyDict[parent] = [child,]
	pkl_utils._save(config.ONTOLOGY_TREE, ontologyDict)
	print "[%s]: generation complete" % time_utils._timestamp()
开发者ID:dbpedia,项目名称:mappings-autogeneration,代码行数:24,代码来源:ontology.py


示例18: parse_args

def parse_args(parser):
    parser.add_option("-l", "--level", default=1, type="int", 
        dest="feature_level", help="feature level, e.g., 1, 2, 3")
    parser.add_option("-c", "--config", default="feature_conf", type="string", 
        dest="feature_conf", help="feature config name")
    parser.add_option("-n", "--name", default="basic%s"%time_utils._timestamp(), 
        type="string", dest="feature_name", help="feature name")
    parser.add_option("-s", "--suffix", default=".pkl", type="string", 
        dest="feature_suffix", help="feature suffix")
    parser.add_option("-m", "--meta_config", default="feature_conf_meta", 
        type="string", dest="feature_conf_meta", help="meta feature config name")
    parser.add_option("-t", "--threshold", default=0.0, type="float", 
        dest="corr_threshold", help="correlation threshold for dropping features")
    (options, args) = parser.parse_args()
    return options, args
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:15,代码来源:feature_combiner.py


示例19: run_char_dist_sim

def run_char_dist_sim():
    logname = "generate_feature_char_dist_sim_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED_STEMMED)
    dfAll.drop(["product_attribute_list"], inplace=True, axis=1)
    
    generators = [CharDistribution_Ratio, CharDistribution_CosineSim, CharDistribution_KL]
    obs_fields_list = []
    target_fields_list = []
    obs_fields_list.append( ["search_term", "search_term_alt", "search_term_auto_corrected"] )
    target_fields_list.append( ["product_title", "product_description", "product_attribute"] )
    for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
        for generator in generators:
            param_list = []
            pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
            pf.go()
开发者ID:amsqr,项目名称:Kaggle_HomeDepot,代码行数:16,代码来源:feature_vector_space.py


示例20: main

def main():
    logname = "generate_feature_doc2vec_%s.log"%time_utils._timestamp()
    logger = logging_utils._get_logger(config.LOG_DIR, logname)
    #### NOTE: use data BEFORE STEMMING
    dfAll = pkl_utils._load(config.ALL_DATA_LEMMATIZED)

    doc2vec_model_dirs = []
    model_prefixes = []
    ## doc2vec model trained with Homedepot dataset: brand/color/obs/title/description
    doc2vec_model_dirs.append( config.DOC2VEC_MODEL_DIR + "/Homedepot-doc2vec-D%d-min_count%d.model"%(config.EMBEDDING_DIM, config.EMBEDDING_MIN_COUNT) )
    model_prefixes.append( "Homedepot" )
    for doc2vec_model_dir, model_prefix in zip(doc2vec_model_dirs, model_prefixes):
        ## load model
        try:
            if ".bin" in doc2vec_model_dir:
                doc2vec_model = gensim.models.Doc2Vec.load_word2vec_format(doc2vec_model_dir, binary=True)
            if ".txt" in doc2vec_model_dir:
                doc2vec_model = gensim.models.Doc2Vec.load_word2vec_format(doc2vec_model_dir, binary=False)
            else:
                doc2vec_model = gensim.models.Doc2Vec.load(doc2vec_model_dir)
                doc2vec_model_sent_label = pkl_utils._load(doc2vec_model_dir+".sent_label")
        except:
            continue

        # ## standalone (not used in model building)
        # obs_fields = ["search_term", "search_term_alt", "product_title", "product_description", "product_attribute"]
        # generator = Doc2Vec_Vector
        # param_list = [doc2vec_model, doc2vec_model_sent_label, model_prefix]
        # sf = StandaloneFeatureWrapper(generator, dfAll, obs_fields, param_list, config.FEAT_DIR, logger)
        # sf.go()

        ## pairwise
        generators = [
            Doc2Vec_CosineSim, 
            Doc2Vec_RMSE, 
            Doc2Vec_Vdiff,
        ]
        obs_fields_list = []
        target_fields_list = []
        obs_fields_list.append( ["search_term", "search_term_alt"] )
        target_fields_list.append( ["product_title", "product_description", "product_attribute", "product_brand", "product_color"] )
        for obs_fields, target_fields in zip(obs_fields_list, target_fields_list):
            for generator in generators:
                param_list = [doc2vec_model, doc2vec_model_sent_label, model_prefix]
                pf = PairwiseFeatureWrapper(generator, dfAll, obs_fields, target_fields, param_list, config.FEAT_DIR, logger)
                pf.go()
开发者ID:MrSnark,项目名称:Kaggle_HomeDepot,代码行数:46,代码来源:feature_doc2vec.py



注:本文中的utils.time_utils._timestamp函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python timer.Timer类代码示例发布时间:2022-05-26
下一篇:
Python thread_utils.call_threaded函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap