• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python classify.MaxentClassifier类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中nltk.classify.MaxentClassifier的典型用法代码示例。如果您正苦于以下问题:Python MaxentClassifier类的具体用法?Python MaxentClassifier怎么用?Python MaxentClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了MaxentClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: main_function

def main_function():
	conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'], 
			user=DATABASES['date_cutoff']['USER'], 
			passwd=DATABASES['date_cutoff']['PASSWORD'], 
			db=DATABASES['date_cutoff']['NAME'])

	training_tweets = classify.get_training_tweets(conn_analysis)
	training_feature_set = process_tweets(training_tweets)

	config_megam('/opt/packages')
	classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)

	error_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	count_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	guess_dict = {'+':0, '-':0, 'I':0, 'O':0} 

	full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, 
				'-':{'+':0, '-':0, 'I':0, 'O':0}, 
				'I':{'+':0, '-':0, 'I':0, 'O':0}, 
				'O':{'+':0, '-':0, 'I':0, 'O':0}}


	test_tweets = classify.get_test_tweets(conn_analysis)
	test_feature_set = process_tweets(test_tweets)

	classifier.show_most_informative_features(10)
	classifier_accuracy = accuracy(classifier, test_feature_set)
	print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:28,代码来源:max-ent-bigrams.py


示例2: __maxent_train

 def __maxent_train(fs):
     return MaxentClassifier.train(fs, 
         algorithm=algorithm,
         gaussian_prior_sigma=gaussian_prior_sigma,
         count_cutoff=count_cutoff,
         min_lldelta=min_lldelta,
         trace=trace)
开发者ID:Sandy4321,项目名称:nltk_contrib,代码行数:7,代码来源:chunk.py


示例3: main_function

def main_function():
	conn = MySQLdb.connect(host=DATABASES['default']['HOST'], 
			user=DATABASES['default']['USER'], 
			passwd=DATABASES['default']['PASSWORD'], 
			db=DATABASES['default']['NAME'])

	training_tweets = classify.get_training_tweets(conn_analysis)
	training_feature_set = classify.process_tweets(training_tweets)

	config_megam('/opt/packages')
	classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)

	count_table = {'+':0, '-':0, 'I':0, 'O':0}  
	tweets = classify.get_tweets_to_classify(conn_analysis);

	for tweet in tweets:
		text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
		guess = classifier.classify(classify.process_tweet(text))
		update_tweet_polarity(tweet[0], guess, conn_analysis)
		count_table[guess] += 1

	#For the tweets where polarity was determined manually, copy from 
	#majority_vote to auto_vote
	fix_manual_tweets(conn_analysis)

	print count_table
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:26,代码来源:full-dataset-max-ent.py


示例4: train

 def train(self, d):
     """
     Given a labeled set, train our classifier.
     """
     t = self.__tag_data_set(d)
     self.classifier = MaxentClassifier.train(t)
     logging.info("Training on %s records complete." % len(d))
开发者ID:agness,项目名称:recipe_nltk,代码行数:7,代码来源:nltk_classifier.py


示例5: _train

 def _train(self, algo='iis', trace=0, max_iter=10):
     '''
     Internal method to train and return a NLTK maxent classifier.
     ''' 
     data = [(p.text, p.quote) for p in train_query]
     train_set = [(get_features(n), g) for (n, g) in data]
     return MaxentClassifier.train(train_set, algorithm=algo, trace=trace, max_iter=max_iter)
开发者ID:bokas,项目名称:citizen-quotes,代码行数:7,代码来源:maxent.py


示例6: classify_maxent

def classify_maxent(X_train, Y_train, X_test):
    training_input = X_train
    training_output = Y_train
    training_data = []
    for i in range(len(training_input)):
        training_data.append((training_input[i], training_output[i]))
    clf = MaxentClassifier.train(training_data)
    pred_labels = clf.classify_many(X_test)
    return pred_labels
开发者ID:JoshuaW1990,项目名称:SentimentAnalysis,代码行数:9,代码来源:SentimentAnalysis.py


示例7: maxent_train

 def maxent_train (self):
 
     self.classifier_all = MaxentClassifier.train (self.maxent_memes_all, trace=100, max_iter=5)
     #classifier_bottom = MaxentClassifier.train (maxent_memes_bottom, trace=100, max_iter=250)
     #classifier_all = MaxentClassifier.train (maxent_memes_all, trace=100, max_iter=250)
     weights = self.classifier_all.weights()
     f = open ("lambdas.txt", "w")
     for weight in weights:
         f.write("weight = %f" % weight)
         f.write ("\n")
开发者ID:AlexeyMK,项目名称:DATASS,代码行数:10,代码来源:NgramsManager.py


示例8: main_function

def main_function():
	conn = MySQLdb.connect(host=DATABASES['default']['HOST'], 
			user=DATABASES['default']['USER'], 
			passwd=DATABASES['default']['PASSWORD'], 
			db=DATABASES['default']['NAME'])

	training_tweets = classify.get_training_tweets(conn)
	training_feature_set = classify.process_tweets(training_tweets)

	bayes_classifier = NaiveBayesClassifier.train(training_feature_set)

	count_table = {'+':0, '-':0, 'I':0, 'O':0}  

	test_tweets = classify.get_test_tweets(conn)

	for tweet in test_tweets:
		text = classify.get_tweet_text(conn, tweet[0])[0][0]
		guess = bayes_classifier.classify(classify.process_tweet(text))
		classify.update_tweet_polarity(tweet[0], guess, conn)
		count_table[guess] += 1

	print "Naive Bayes"
	print count_table

	count_table = {'+':0, '-':0, 'I':0, 'O':0}  
	config_megam('/opt/packages')
	max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)

	for tweet in test_tweets:
		text = classify.get_tweet_text(conn, tweet[0])[0][0]
		guess = max_ent_classifier.classify(classify.process_tweet(text))
		update_tweet_polarity_ensemble(tweet[0], guess, conn)
		count_table[guess] += 1

	print "Maximum Entropy"
	print count_table

	#generate the accuracy matrix
	full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, 
				'-':{'+':0, '-':0, 'I':0, 'O':0}, 
				'I':{'+':0, '-':0, 'I':0, 'O':0}, 
				'O':{'+':0, '-':0, 'I':0, 'O':0}}

	for tweet in test_tweets:
		result = classify.run_sql(conn, classify.Statements.CHECK_CONSENSUS % tweet[0])
		guess = result[0][0]

		actual_result = classify.run_sql(conn, classify.Statements.CHECK_MAJORITY % tweet[0])
		actual = actual_result[0][0]

		if guess is not None:
			if actual is not None:
				full_matrix[actual][guess] += 1

	print full_matrix
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:55,代码来源:a-ensemble-bayes-max-ent.py


示例9: axentClassifier

def axentClassifier(features_train, features_test):
	print 'train on %d instances, test on %d instances' % (len(features_train), len(features_test))
	classifier = MaxentClassifier.train(features_train,algorithm='gis')
	print 'accuracy:', nltk.classify.util.accuracy(classifier, features_test)
	precisions, recalls = precision_recall(classifier, features_test)
	print "accuracy: ", precisions, "fitness: ", recalls

# def sklearnMultinomialNB(features_train, features_test):
# 	print 'train on %d instances, test on %d instances' % (len(features_train), len(features_test))
# 	classifier = SklearnClassifier(MultinomialNB())
# 	classifier.train
# 	print 'accuracy:', nltk.classify.util.accuracy(classifier, features_test)
开发者ID:andylikescodes,项目名称:SentimentalAnalysis,代码行数:12,代码来源:Classifiers.py


示例10: run

def run(training):
    """
    To create and train a MaxentClassifier
    :return: a trained Classifier
    """
    print "Training ME Classifier..."
    # feats = label_feat_from_corps(movie_reviews)
    # training, testing = split_label_feats(feats)

    me_classifier = MaxentClassifier.train(training, algorithm='GIS', trace=0, max_iter=10, min_lldelta=0.5)
    print "ME Classifier trained..."
    return save_classifier(me_classifier)
开发者ID:Saher-,项目名称:SATC,代码行数:12,代码来源:Classifier_ME.py


示例11: trainMaxent

def trainMaxent(featuresets):
    #idx = 2*len(featuresets) / ratio
    #train_set, test_set = featuresets[idx:], featuresets[:idx]
    train_set = featuresets
    algo = MaxentClassifier.ALGORITHMS[1]
    #max_iter=20
    classifier = MaxentClassifier.train(train_set, algo, max_iter=3)
    #print accuracy(classifier, test_set)
    classifier.show_most_informative_features(100)
    #train_set, test_set = featuresets[idx:], featuresets[:idx]
    #classifier.train(train_set, algo, max_iter=20)
    #print accuracy(classifier, test_set)
    #classifier.show_most_informative_features(100)
    return classifier
开发者ID:tkuboi,项目名称:eDetection_v2_1,代码行数:14,代码来源:classifyFace.py


示例12: train

 def train(cls, training_sequence, **kwargs):
     feature_detector = kwargs.get('feature_detector')
     gaussian_prior_sigma = kwargs.get('gaussian_prior_sigma', 10)
     count_cutoff = kwargs.get('count_cutoff', 1)
     stopping_condition = kwargs.get('stopping_condition', 1e-7)
     def __featurize(tagged_token):
         tag = tagged_token[-1]
         feats = feature_detector(tagged_token)
         return (feats, tag)
     labeled_featuresets = LazyMap(__featurize, training_sequence)
     classifier = MaxentClassifier.train(labeled_featuresets,
                             algorithm='megam',
                             gaussian_prior_sigma=gaussian_prior_sigma,
                             count_cutoff=count_cutoff,
                             min_lldelta=stopping_condition)
     return cls(classifier._encoding, classifier.weights())
开发者ID:Sandy4321,项目名称:nltk_contrib,代码行数:16,代码来源:train.py


示例13: trainCorpus

def trainCorpus():
	if os.path.exists(classifier_fname):
		return LoadClassifier()
	else:
		c = getDealsCorpus()
		hiwords = corpus_high_info_words(c)
		featdet = lambda words: bag_of_words_in_set(words, hiwords)
		train_feats, test_feats = corpus_train_test_feats(c, featdet)
		trainf = lambda train_feats: MaxentClassifier.train(train_feats, algorithm='megam', trace=0, max_iter=10)
		labelset = set(c.categories())
		classifiers = train_binary_classifiers(trainf, train_feats, labelset)
		multi_classifier = MultiBinaryClassifier(*classifiers.items())
		multi_p, multi_r, avg_md = multi_metrics(multi_classifier, test_feats)
		print multi_p['activitiesevents'], multi_r['activitiesevents'], avg_md
		SaveClassifier(multi_classifier)
		return multi_classifier
开发者ID:shingjay,项目名称:dealchan,代码行数:16,代码来源:trainer.py


示例14: train

    def train(self, featureset=None):
        """
        Trains the maximum entropy classifier and returns it. If a
        featureset is specified it trains on that, otherwise it trains on
        the models featureset.

        Pass in a featureset during cross validation.
        Returns the training time and the classifier.
        """
        featureset = featureset or self.featureset()

        # Time how long it takes to train
        start = time.time()

        classifier = MaxentClassifier.train(featureset,
                        algorithm='megam', trace=1, gaussian_prior_sigma=1)

        delta = time.time() - start
        return classifier, delta
开发者ID:ericvsmith,项目名称:product-classifier,代码行数:19,代码来源:build.py


示例15: parse

def parse():
    tagger_classes=([nltk.UnigramTagger, nltk.BigramTagger])
    trained_sents, tagged_sents =  trainer("WSJ_02-21.pos-chunk","WSJ_23.pos")
    #tagger = nltk.UnigramTagger(trained_sents)
    print len(trained_sents)
    tagger = ClassifierBasedPOSTagger(train=trained_sents[:10000], classifier_builder=lambda train_feats: 
    MaxentClassifier.train(train_feats, trace = 0,max_iter=10))
    f = open("WSJ_23.chunk",'w')
        #print sents
    for sents in tagged_sents:
        (words,tags)=sents[0],sents[1]
        chunks = tagger.tag(tags)
        #print words, chunks
        wtc = zip(words, chunks)


        for tup in wtc:
	   f.write("%s\t%s\n" %(tup[0],tup[1][1]))

        f.write("\n")
开发者ID:pratheeksh,项目名称:NLP,代码行数:20,代码来源:chunker.py


示例16: main_function

def main_function():
    conn = MySQLdb.connect(
        host=DATABASES["date_cutoff"]["HOST"],
        user=DATABASES["date_cutoff"]["USER"],
        passwd=DATABASES["date_cutoff"]["PASSWORD"],
        db=DATABASES["date_cutoff"]["NAME"],
    )

    training_tweets = get_test_tweets(conn)
    # training_feature_set = process_tweets(training_tweets)

    total_word_count = total_words(conn)
    training_feature_set = process_bigrams(conn, "+", total_word_count, best_words)
    training_feature_set += process_bigrams(conn, "-", total_word_count, best_words)
    training_feature_set += process_bigrams(conn, "I", total_word_count, best_words)
    training_feature_set += process_bigrams(conn, "O", total_word_count, best_words)

    print "configuring megam"
    config_megam("/opt/packages")
    print "starting training"
    classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
    print "starting end training"
    classifier.show_most_informative_features(40)

    test_tweets = get_training_tweets(conn)
    test_feature_set = process_tweets(test_tweets)

    classifier_accuracy = accuracy(classifier, test_feature_set)

    # full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0},
    # 			'-':{'+':0, '-':0, 'I':0, 'O':0},
    # 			'I':{'+':0, '-':0, 'I':0, 'O':0},
    # 			'O':{'+':0, '-':0, 'I':0, 'O':0}}

    # for f in test_tweets:
    # 	guess = classifier.classify(process_tweet(f[1]))
    # 	full_matrix[f[2]][guess] += 1

    # print full_matrix
    print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:khandelwal,项目名称:vaccine-sentiment,代码行数:40,代码来源:max-ent-sig-words-bigrams.py


示例17: _train_mode_for_user

    def _train_mode_for_user(self, userid):
        if userid in self._user_classifier:
            print("Already exist!!!")
            self._user_classifier[userid] = None
        # 只需要好评和差评的,一般评论不做参考,所以INNER JOIN足够
        sql = """ SELECT site_news.news_uuid, user_score.news_user_score as news_score FROM site_news 
                        INNER JOIN user_score ON site_news.news_uuid = user_score.newsid 
                  WHERE DATE(site_news.time) < CURRENT_DATE() AND DATE(site_news.time) > DATE_SUB(CURRENT_DATE(),INTERVAL 5 DAY) 
                        AND user_score.news_user_score != 1 AND user_score.userid=%d; """ %(userid)
        train_items = self.db_conn.query(sql);

        print("建立POS/NEG特征")
        pos_feature = []
        neg_feature = []        
        for item in train_items:
            news_vector = nlp_master.get_old_vect(item['news_uuid']);
            if item['news_score'] == 0: #好评
                pos_feature.append((self.best_word_features(news_vector, news_vector),'pos'))
            elif item['news_score'] == 2: #差评
                neg_feature.append((self.best_word_features(news_vector, news_vector),'neg'))
        print("POS:%d, NEG:%d" %(len(pos_feature),len(neg_feature)))
        
        if len(pos_feature) <= 3 or len(neg_feature) <=3:
            print("特征太少,放弃。。。")
            self._user_classifier[userid] = None
            return
        
        trainSet = pos_feature + neg_feature
        self._user_classifier[userid] = MaxentClassifier.train(trainSet, max_iter=50)
        print("MaxEnt Classifier for %d build done!"%(userid))

        # 保存更新结果
        today = datetime.date.today()
        self.dumpfile = "dumpdir/recmaxent_dump.%d_%d" %(today.month, today.day)
        with open(self.dumpfile,'wb', -1) as fp:
            dump_data = []
            dump_data.append(self._user_classifier)
            pickle.dump(dump_data, fp, -1)              
        return
开发者ID:FashtimeDotCom,项目名称:readmeinfo,代码行数:39,代码来源:RecMaxEnt.py


示例18: __init__

    def __init__(self):
        try:
            classifier = None
            if not os.path.exists(classifier_path):

                '''with open('nltk_sentiment_data/polarity_pos.txt', 'rb') as fp:
                    pos_lines = fp.readlines()
                    pos_feats = [(word_feats(tokenizer.tokenize(p_line)), '1') for p_line in pos_lines]
                with open ('nltk_sentiment_data/polarity_neg.txt', 'rb') as fn:
                    neg_lines = fn.readlines()
                    neg_feats = [(word_feats(tokenizer.tokenize(n_line)), '0') for n_line in neg_lines]'''

                filename = os.path.dirname(os.path.abspath(__file__)) + "/nltk_sentiment_data/sentiment_data_twitter.txt"
                with open(filename, 'rb') as fp:                 
                    lines = fp.readlines()
                    feats =[(word_feats(tokenizer.tokenize(line.split(' -> ')[1].strip().lower())), line.split(' -> ')[0]) for line in lines if len(line.split(' -> ')) >=2]
                print "Total : %s" %(len(feats),)
                cutoff = int(len(feats)*0.1)
                trainfeats, testfeats = feats[cutoff:], feats[:cutoff]

                '''cutoff = int(len(pos_feats) * 0.1)
                trainfeats = pos_feats[cutoff:] + neg_feats[cutoff:]
                testfeats = pos_feats[:cutoff] + neg_feats[:cutoff]'''
                print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
 
                #classifier = NaiveBayesClassifier.train(trainfeats)
                classifier = MaxentClassifier.train(trainfeats, algorithm='iis', trace=0, max_iter=10)
                print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
                classifier.show_most_informative_features()
                with open(classifier_path, "w") as fh:
                    cPickle.dump(classifier, fh, 1)
            else:
                with open(classifier_path, "r") as fh:
                    classifier = cPickle.load(fh)
            self.classifier = classifier
            logger.info("Initialized SentimentClassifier instance..")
        except Exception, e:
            logger.exception(e)
            raise e
开发者ID:Rahulaswani,项目名称:TweetSherlock,代码行数:39,代码来源:sentimentClassifier.py


示例19: evaluate_classifier

def evaluate_classifier(featx,collocationFunc):
    negids = movie_reviews.fileids('neg')
    posids = movie_reviews.fileids('pos')
 
    negfeats = [(featx(movie_reviews.words(fileids=[f]),collocationFunc), 'neg') for f in negids]
    posfeats = [(featx(movie_reviews.words(fileids=[f]),collocationFunc), 'pos') for f in posids]

    lenNegFeats=min(len(negfeats),400)
    lenPosFeats=min(len(posfeats),400)
#    lenNegFeats=len(negfeats)
#    lenPosFeats=len(posfeats)
    negcutoff = int(lenNegFeats*3/4)
    poscutoff = int(lenPosFeats*3/4)
 
    trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:lenNegFeats] + posfeats[poscutoff:lenPosFeats]
 
    classifier = MaxentClassifier.train(trainfeats,algorithm='IIS',max_iter=3)
    print(classifier)
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
    print(classifier)
    for i, (feats, label) in enumerate(testfeats):
            refsets[label].add(i)
            observed = classifier.classify(feats)
            testsets[observed].add(i)
    evaluationMetrics={}
    classifier.show_most_informative_features()
    evaluationMetrics['model']=classifier
    evaluationMetrics['trainingData']=trainfeats
    evaluationMetrics['accuracy']=nltk.classify.util.accuracy(classifier, testfeats)
    evaluationMetrics['posPrec']=nltk.precision(refsets['pos'], testsets['pos'])
    evaluationMetrics['posRecall']=nltk.recall(refsets['pos'], testsets['pos'])
    evaluationMetrics['posF_Score']=nltk.f_measure(refsets['pos'], testsets['pos'])
    evaluationMetrics['negPrec']=nltk.precision(refsets['neg'], testsets['neg'])
    evaluationMetrics['negRecall']=nltk.recall(refsets['neg'], testsets['neg'])
    evaluationMetrics['negF_Score']=nltk.f_measure(refsets['neg'], testsets['neg'])
    return evaluationMetrics
开发者ID:alokkumary2j,项目名称:Sentiment-Analysis-Using-Python-NLTK,代码行数:38,代码来源:MaxEntSentimentAnalysis.py


示例20: getClassifier

def getClassifier(tweetfile):
    print "Loading content & preparing text"
    content = prepText(loadFile(tweetfile))
    print "Categorizing contents"
    categorized = prepClassifications(content)
    print "Deriving NGrams"
    NGrammized = collectNGrams(categorized,degreesToUse)
    print "Compiling Results"
    readyToSend = []
    for category in NGrammized.keys():
        readyToSend += NGrammized[category]
        
    print "Attempting Classification"
    if classMode == 'naive bayes':
        from nltk.classify import NaiveBayesClassifier
        classifier = NaiveBayesClassifier.train(readyToSend)
    elif classMode == 'max ent':
        from nltk.classify import MaxentClassifier
        classifier = MaxentClassifier.train(readyToSend)
        
    print
    classifier.show_most_informative_features(n=200)
    classifier.show_most_informative_features()
    return classifier
开发者ID:jschlitt84,项目名称:TwitterGIStudy,代码行数:24,代码来源:TweetMatch.py



注:本文中的nltk.classify.MaxentClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python classify.NaiveBayesClassifier类代码示例发布时间:2022-05-27
下一篇:
Python classify.accuracy函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap