本文整理汇总了Python中tester.test_classifier函数的典型用法代码示例。如果您正苦于以下问题:Python test_classifier函数的具体用法?Python test_classifier怎么用?Python test_classifier使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了test_classifier函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: tune_classifier
def tune_classifier(classifier, clf_params, max_features):
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".
features_list = get_feature_list()
### Create new feature(s)
### Store to my_dataset for easy export below.
my_dataset = get_data()
### Extract features and labels from dataset for local testing
features_list = features_list[0:max_features+1]
data, labels, features = get_features_and_labels(my_dataset, features_list)
### Tune your classifier to achieve better than .3 precision and recall
### using our testing script. Check the tester.py script in the final project
### folder for details on the evaluation method, especially the test_classifier
### function. Because of the small size of the dataset, the script uses
### stratified shuffle split cross validation. For more info:
### http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.StratifiedShuffleSplit.html
from sklearn.cross_validation import train_test_split
features_train, features_test, labels_train, labels_test = \
train_test_split(features, labels, test_size=0.3, random_state=42)
# Testing
clf = GridSearchCV(classifier, param_grid=clf_params, scoring=make_scorer(f1_score))
clf.fit(features_train, labels_train)
clf_final = clf.best_estimator_
print "The best estimator = ", clf_final
test_classifier(clf_final, my_dataset, features_list, 1000)
开发者ID:tommysiu,项目名称:udacity-data-analyst,代码行数:30,代码来源:tuning.py
示例2: RandomForest
def RandomForest(feature_list,dataset):
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
test_classifier(clf,dataset,feature_list)
imp= clf.feature_importances_
print_importance (feature_list,imp)
return clf
开发者ID:MengoDB,项目名称:Identify-Fraud-from-Enron-Emails,代码行数:7,代码来源:poi_id.py
示例3: GaussianNB
def GaussianNB(feature_list, dataset):
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
test_classifier(clf, dataset, feature_list)
#score = clf.
return clf
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:7,代码来源:poi_bkp.py
示例4: decisionTree
def decisionTree(feature_list, dataset):
from sklearn import tree
clf = tree.DecisionTreeClassifier()
test_classifier(clf, dataset, feature_list)
print clf.feature_importances_
return clf
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:7,代码来源:poi_bkp.py
示例5: iterPipe
def iterPipe(num1, num2):
for i in range(num1, num2 + 1):
# estimators = [('scaling', StandardScaler()),('reduce_dim', PCA()), ('dtc', DTC(min_samples_split=i*2))]
# estimators = [('reduce_dim', PCA(n_components=2)), ('dtc', DTC(min_samples_split=i))]
# clfIter = Pipeline(estimators)
# clfIter.set_params(reduce_dim__n_components=3)
clfIter = DTC(min_samples_split=i)
test_classifier(clfIter, my_dataset, features_list)
开发者ID:Faylfire,项目名称:identifying_enron_fraud_project_5_fang_lu,代码行数:8,代码来源:poi_id.py
示例6: KNN
def KNN(feature_list,dataset):
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
knn = KNeighborsClassifier()
# feature scale
estimators = [('scale', StandardScaler()), ('knn', knn)]
clf = Pipeline(estimators)
test_classifier(clf, my_dataset, features_list)
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:9,代码来源:poi_id.py
示例7: setup_and_test
def setup_and_test(my_dataset, features_list, classifier):
# Dump classifier and features list, so we can test them
dump_classifier_and_data(classifier, my_dataset, features_list)
# load up student's classifier, dataset, and feature_list
clf, dataset, feature_list = load_classifier_and_data()
# Run testing script
test_classifier(clf, dataset, feature_list)
return
开发者ID:joashxu,项目名称:enron,代码行数:10,代码来源:utils.py
示例8: tuneKmeans
def tuneKmeans(feature_list,dataset):
from sklearn.cluster import KMeans
from sklearn.grid_search import GridSearchCV
km_clf = KMeans(n_clusters=2, tol=0.001)
parameters = {'n_clusters': (2,10)}
clf = GridSearchCV(km_clf, parameters, scoring='recall')
test_classifier(clf, dataset, feature_list)
print '###best_params'
print clf.best_params_
return clf.best_estimator_
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:11,代码来源:poi_bkp.py
示例9: tuneDT
def tuneDT(feature_list,dataset):
from sklearn.neighbors import KNeighborsClassifier
from sklearn.grid_search import GridSearchCV
from sklearn import tree
tree_clf = tree.DecisionTreeClassifier()
parameters = {'criterion':('gini', 'entropy'),
'splitter':('best','random')}
clf = GridSearchCV(tree_clf, parameters,scoring = 'recall')
test_classifier(clf, my_dataset, features_list)
print '###best_params'
print clf.best_params_
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:11,代码来源:poi_id.py
示例10: detect_poi
def detect_poi():
### Load the dictionary containing the dataset
data_dict = pickle.load(open("final_project_dataset.pkl", "r") )
### Task 1: Remove outliers
data_dict.pop('TOTAL',0)
### Task 2: Select what features
### 'stk_pay_ratio','to_poi_ratio', 'from_poi_ratio','bonus_salary_ratio'
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".
my_dataset = data_dict
stk_pay_ratio(my_dataset)
from_poi_ratio(my_dataset)
to_poi_ratio(my_dataset)
bonus_salary_ratio(my_dataset)
### Task 3: Feature Selection
### Generate a set of 15 feature lists from these 4 features
### This way, all possible combinations of these features are tested
all_features_list = fList_set()
### Because of the small size of the dataset, the script uses stratified
### shuffle split cross validation in tester.py
metrics = []
clf = GaussianNB()
### ptest uses Stratified shuffle split cross validation and calculates the precision
### Find the precision for every list
for i in range(0,15):
metrics.append(ptest(clf,my_dataset,all_features_list[i]))
### Go for the feature list that produces the best precision.
### For this dataset only, it is harder to get a high precision.
best = np.array(metrics).argmax()
### Run test_classifier to print evaluation metrics to console
test_classifier(clf, my_dataset,all_features_list[best])
### Now use the same feature list to run the decison tree classifier
features_list = all_features_list[best]
### Task 4: Try a varity of classifiers
samples_split_values = [2,4]
samples_leaf_values = [1,2]
for split in samples_split_values:
for leaf in samples_leaf_values:
clf = tree.DecisionTreeClassifier(min_samples_split=split,\
min_samples_leaf=leaf)
test_classifier(clf, my_dataset, features_list)
print_feature_importances(features_list, clf)
###Choose best classfier and feature set
clf = GaussianNB()
### Dump classifier, dataset, and features_list
dump_classifier_and_data(clf, my_dataset, features_list)
开发者ID:RaphaelTam,项目名称:Enron_Bad_Guys,代码行数:54,代码来源:poi.id.py
示例11: explore_scores
def explore_scores():
for n in features:
for c in n_neighbor:
for d in weights:
for e in algorithm:
for f in leaf_size:
for g in p:
for h in metric:
feature = 0
feature = features_select(n)
pipeline = Pipeline([('normalization', scaler),
('classifier', KNeighborsClassifier(n_neighbors=c, weights=d, algorithm=e,
leaf_size=f, p=g, metric=h))])
test_classifier(pipeline, enron_data, feature)
开发者ID:BlaneG,项目名称:Udacity_Intro_machine_learning,代码行数:14,代码来源:poi_id.py
示例12: tuneKNN
def tuneKNN(feature_list,dataset):
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.grid_search import GridSearchCV
knn = KNeighborsClassifier()
# feature scale
estimators = [('scale', StandardScaler()), ('knn', knn)]
pipeline = Pipeline(estimators)
parameters = {'knn__n_neighbors':[1,8],
'knn__algorithm':('ball_tree','kd_tree','brute','auto')}
clf = GridSearchCV(pipeline, parameters,scoring = 'recall')
test_classifier(clf, my_dataset, features_list)
print '###best_params'
print clf.best_params_
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:15,代码来源:poi_id.py
示例13: getRF
def getRF():
print "==============="
print "RandomForests"
print "==============="
for score in scores:
print score
print
#parameters = {'n_estimators':range(10, 150, 10), 'criterion':['gini', 'entropy'], 'min_samples_split':range(2, 8, 2)}
parameters = {'rf__n_estimators':range(10, 150, 10), 'rf__criterion':['gini', 'entropy'], 'rf__min_samples_split':range(2, 8, 2),
'selector__k':range(3, 22, 1)}
gs = grid_search.GridSearchCV(rf_pipe, parameters, scoring=score, cv=cv)
gs.fit(features, labels)
#This is the model you pass to tester.py
clf = gs.best_estimator_
print " "
print "Optimal Model - by Grid Search"
print clf
print " "
best_parameters = gs.best_estimator_.get_params()
print " "
print "Best Parameters- by Grid Search"
print best_parameters
print " "
labels_pred = gs.predict(features)
# Print Results (will print the Grid Search score)
print "Grid Search Classification report:"
print " "
print classification_report(labels, labels_pred)
print ' '
# Print Results (will print the tester.py score)
print "tester.py Classification report:"
print " "
test_classifier(clf, my_dataset, features_list)
print " "
print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py
示例14: getKNN
def getKNN():
print "==============="
print "KNeighborsClassifier"
print "==============="
for score in scores:
print score
print
#parameters = {'n_neighbors':range(2, 10, 2), 'weights':['distance', 'uniform'], 'metric':['minkowski', 'euclidean']}
parameters = {'knn__n_neighbors': range(2, 10, 2), 'knn__weights':['distance', 'uniform'], 'knn__metric':['minkowski', 'euclidean'],
'selector__k':range(3, 20, 1)}
gs = grid_search.GridSearchCV(knn_pipe, parameters, scoring=score, cv=cv)
gs.fit(features, labels)
#This is the model you pass to tester.py
clf = gs.best_estimator_
print " "
print "Optimal Model - by Grid Search"
print clf
print " "
best_parameters = gs.best_estimator_.get_params()
print " "
print "Best Parameters- by Grid Search"
print best_parameters
print " "
labels_pred = gs.predict(features)
# Print Results (will print the Grid Search score)
print "Grid Search Classification report:"
print " "
print classification_report(labels, labels_pred)
print ' '
# Print Results (will print the tester.py score)
print "tester.py Classification report:"
print " "
test_classifier(clf, my_dataset, features_list)
print " "
print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py
示例15: getAda
def getAda():
print "==============="
print "AdaBoost"
print "==============="
for score in scores:
print score
print
#parameters = {'n_estimators':range(50, 100, 1), 'learning_rate':[x * 0.01 for x in range(100, 160, 1)]}
parameters = {'ada__n_estimators': range(1, 100, 20), 'ada__learning_rate':[x * 0.01 for x in range(100, 160, 10)],
'selector__k':range(3, 22, 1)}
gs = grid_search.GridSearchCV(ada_pipe, parameters, scoring=score, cv=cv)
gs.fit(features, labels)
#This is the model you pass to tester.py
clf = gs.best_estimator_
print " "
print "Optimal Model - by Grid Search"
print clf
print " "
best_parameters = gs.best_estimator_.get_params()
print " "
print "Best Parameters- by Grid Search"
print best_parameters
print " "
labels_pred = gs.predict(features)
# Print Results (will print the Grid Search score)
print "Grid Search Classification report:"
print " "
print classification_report(labels, labels_pred)
print ' '
# Print Results (will print the tester.py score)
print "tester.py Classification report:"
print " "
test_classifier(clf, my_dataset, features_list)
print " "
print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py
示例16: getSVC
def getSVC():
print "==============="
print "SVC"
print "==============="
for score in scores:
print score
print
parameters = {'sv__C': [0.01, 0.1, 1, 500, 1000, 5000, 10000, 50000, 100000], 'sv__kernel':['linear'],
'selector__k':range(3, 22, 1)} #'sv__gamma':[0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 1, 10, 100, 500, 1000],
gs = grid_search.GridSearchCV(sv_pipe, parameters, scoring=score, cv=cv)
gs.fit(features, labels)
#This is the model you pass to tester.py
clf = gs.best_estimator_
print " "
print "Optimal Model - by Grid Search"
print clf
print " "
best_parameters = gs.best_estimator_.get_params()
print " "
print "Best Parameters- by Grid Search"
print best_parameters
print " "
labels_pred = gs.predict(features)
# Print Results (will print the Grid Search score)
print "Grid Search Classification report:"
print " "
print classification_report(labels, labels_pred)
print ' '
# Print Results (will print the tester.py score)
print "tester.py Classification report:"
print " "
test_classifier(clf, my_dataset, features_list)
print " "
print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:47,代码来源:poi_id.py
示例17: getNB
def getNB():
print "==============="
print "GaussianNB"
print "==============="
for score in scores:
print score
print
parameters = {'selector__k':range(3, 22, 1)}
gs = grid_search.GridSearchCV(nb_pipe, parameters, scoring=score, cv=cv)
gs.fit(features, labels)
#This is the model you pass to tester.py
clf = gs.best_estimator_
print " "
print "Optimal Model - by Grid Search"
print clf
print " "
best_parameters = gs.best_estimator_.get_params()
print " "
print "Best Parameters- by Grid Search"
print best_parameters
print " "
labels_pred = gs.predict(features)
# Print Results (will print the Grid Search score)
print "Grid Search Classification report:"
print " "
print classification_report(labels, labels_pred)
print ' '
# Print Results (will print the tester.py score)
print "tester.py Classification report:"
print " "
test_classifier(clf, my_dataset, features_list)
print " "
print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:46,代码来源:poi_id.py
示例18: train_test
def train_test():
data = featureFormat(my_dataset, features_list, sort_keys = True)
labels, features = targetFeatureSplit(data)
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.3, random_state=42)
clf = DecisionTreeClassifier(random_state=42)
clf.fit(features_train, labels_train)
print test_classifier(clf, my_dataset, features_list)
### Print feature importance in order
features_imp = {}
for i in xrange(len(features_list)-1):
features_imp[features_list[1+i]] = clf.feature_importances_[i]
pprint(sorted(features_imp.items(), key=operator.itemgetter(1),reverse=True))
开发者ID:nickreinerink,项目名称:Udacity-P5-Identity-Fraud-from-Enron-Email,代码行数:18,代码来源:new_features.py
示例19: main
def main():
data_dict = pickle.load(open("final_project_dataset.pkl", "r"))
my_dataset = data_dict
my_dataset = AddFeatures(my_dataset)
# Exclude using Discretion.
Exc1 = ["email_address"]
# Replaced by creating better versions of the features
Exc2 = ["to_messages", "from_messages", "from_this_person_to_poi", "from_poi_to_this_person"]
# Exclude because Highly Correlated with stronger features
Exc3 = [
"deferral_payments",
"expenses",
"deferred_income",
"restricted_stock_deferred",
"director_fees",
"long_term_incentive",
"bonus",
"total_payments",
"salary",
"total_stock_value",
"restricted_stock",
"exercised_stock_options",
"other",
]
exclude = Exc1 + Exc2 + Exc3
# QueryDataSet(my_dataset)
# ShowCorrel(my_dataset)
features_list = next(my_dataset.itervalues()).keys()
for i in exclude:
features_list.remove(i)
features_list.insert(0, features_list.pop(features_list.index("poi")))
data = featureFormat(my_dataset, features_list, sort_keys=True)
### Extract features and labels from dataset for local testing
labels, features = targetFeatureSplit(data)
features_train, features_test, labels_train, labels_test = train_test_split(
features, labels, test_size=0.1, random_state=42, stratify=labels
)
# clf=TuneSVM(features, labels,features_list)
# clf=TuneKNN(features, labels,features_list)
# clf=NoTuneDT(features, labels,features_list)
# clf=TuneDT(features,labels,features_list)
features_list.insert(0, "poi")
dump_classifier_and_data(clf, my_dataset, features_list)
test_classifier(clf, my_dataset, features_list)
开发者ID:datalord123,项目名称:MachineLearning,代码行数:44,代码来源:poi_id.py
示例20: train_and_predict
def train_and_predict(first,second):
#trains the model and returns the value of desired evaluation metric
features_list = ["poi",first,second]
data = featureFormat(my_dataset, features_list, sort_keys = True)
labels, features = targetFeatureSplit(data)
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
if dt:
clf = tree.DecisionTreeClassifier()
else:
clf = GaussianNB()
if f1:
return test_classifier(clf, my_dataset, features_list,return_F1=True)
else:
return test_classifier(clf, my_dataset, features_list,return_precision=True)
开发者ID:reinson,项目名称:UdacityProject4_Identifying-Fraud-from-Enron-Email,代码行数:19,代码来源:poi_id.py
注:本文中的tester.test_classifier函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论