本文整理汇总了Python中utility.load_data函数的典型用法代码示例。如果您正苦于以下问题:Python load_data函数的具体用法?Python load_data怎么用?Python load_data使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_data函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
revision = 1
print("Loading the classifier")
classifier = utility.load_model("train_rtext_rev{}".format(revision))
print("Reading in the training data")
train = utility.load_data("training", "rtext")
print("Predicting the rest of the training data")
pred = np.ravel(classifier.predict(list(train['rtext_bcat'])))
score = utility.rmsle_log(pred, train['votes_useful_log'])
print "Score:", score
print("Writing out new training data")
del train['rtext_bcat']
train['votes_useful_log_rtextpred_sgd'] = pd.Series(pred, index=train.index)
utility.save_data(train, "training", "rtext_sgd_rev{}".format(revision))
print("Reading in the test data")
test = utility.load_data("test", "rtext")
tepred = np.ravel(classifier.predict(list(test['rtext_bcat'])))
print("Writing out new test data")
del test['rtext_bcat']
test['votes_useful_log_rtextpred_sgd'] = pd.Series(tepred, index=test.index)
utility.save_data(test, "test", "rtext_sgd_rev{}".format(revision))
test['votes'] = pd.Series(np.exp(tepred) + 1, index=test.index)
print("Writing out a new submission file")
utility.write_submission(test, "rtextsgd_sub_rev{}".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:31,代码来源:predict_rtext_sgd.py
示例2: main
def main():
print("Loading the classifier")
classifier = utility.load_model("fullsgd_model_rev{}".format(revision))
print("Reading in the training data")
train = utility.load_data("training", "finalinput")
truth = train['votes_useful_log']
del train['votes_useful_log']
print("Predicting the training data")
logpred = np.ravel(classifier.predict(train.values[:,1:]))
score = utility.rmsle_log(logpred, truth)
print "Score:", score
print("Reading in the test data")
test = utility.load_data("test", "finalinput")
del test['votes_useful_log']
print("Predicting the test data")
logpred = np.ravel(classifier.predict(test.values[:,1:]))
pred = np.exp(np.array(logpred, dtype=np.float64)) - 1
test['votes'] = pred
print("Writing out a new submission file")
utility.write_submission(test, "fullsgd_sub_rev{}.csv".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:25,代码来源:predict.py
示例3: main
def main():
#load data
df = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','BORO','GRADE','GRADE DATE'])
df = clean_data(df) #clean data
#question 4
sum_nyc, sum_boro = grade_sum(df) #calculate sum of test_grade in nyc and in each borough
print 'The sum of test_grade in NYC is: {} \n'.format(sum_nyc)
print 'The sum of test_grade in each boroughs is: \n {}'.format(sum_boro)
#question 5
grade_overtime_plot(df, 'nyc') #grade overtime plot for nyc
#grade overtime plot for each borough
for borough in ['BRONX', 'BROOKLYN', 'MANHATTAN', 'QUEENS', 'STATEN ISLAND']:
df_boro = df[df['BORO'] == borough]
grade_overtime_plot(df_boro, borough.lower())
#question 6
df1 = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','CUISINE DESCRIPTION'])
type_name = get_top_10_nyc(df1)
df2 = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','CUISINE DESCRIPTION', 'GRADE', 'GRADE DATE'])
df2 = clean_data(df2)
df2 = df2[df2['CUISINE DESCRIPTION'].isin(type_name)]
df_sum = top_10_grade_overtime(df2, type_name) #calculate score overtime for each restaurant type
top_10_plot(df_sum) #score overtime plot
top_10_colormap(df_sum) #plot correlation between any two restaurant types in NYC in color map
开发者ID:ariesyi329,项目名称:assignment10,代码行数:27,代码来源:assignment10.py
示例4: main
def main():
"""
This function is to present the results of this assignment.
Users will ask to see:
1)Income distribution across all countries for a given year:
Users need to input a year from 1800 to 2012.
Results will be saved as a .png file.
2)Income distribution by region in recent years:
Users need to input the first year, last year and year gap in a year rangeand select a plot type, boxplot or histograms.
Results will be saved as a .pdf file.
"""
#load countries and income data
countries = load_data('countries.csv')
income = load_data('indicator gapminder gdp_per_capita_ppp.csv')
#transform income data set
income = trans_data(income)
try:
while raw_input('To see income distribution across all countries? (y/n) ') == 'y':
try:
year = raw_input('Which year? ') #select a year
income_distr(income, year)
except:
print 'Please input a year from 1800 to 2012'
while raw_input('To see income distribution by region in recent years? (y/n) ') == 'y':
try:
from_year = int(raw_input('From which year? ')) #input the first year
to_year = int(raw_input('To which year? ')) #input the last year
year_gap = int(raw_input('Year gap? ')) #input a year gap
pltype = raw_input('Plot type: boxplots or histograms? (b/h) ') #select a plot type
if pltype == 'b':
pp = PdfPages('results/Income by region from {0} to {1}_boxplot.pdf'.format(from_year, to_year)) #create a pdf file to save plots
for i in xrange(from_year, to_year+1, year_gap):
fig = income_region(1,str(i))
pp.savefig(fig)
elif pltype == 'h':
pp = PdfPages('results/Income by region from {0} to {1}_hist.pdf'.format(from_year, to_year))
for i in xrange(from_year, to_year+1, year_gap):
fig = income_region(0, str(i))
plt.suptitle('{}'.format(i))
pp.savefig(fig)
pp.close() #close the pdf file
except:
print 'please input years from 1800 to 2012 and try again!'
except(KeyboardInterrupt):
print 'Bye!'
sys.exit()
开发者ID:ariesyi329,项目名称:assignment9,代码行数:50,代码来源:assignment9.py
示例5: optimal_svm
def optimal_svm(optimal_c):
"""
This function is to calculate AUC for optimal C chose from model selection
"""
#load datasets
train_X, train_y = load_data('train_X.csv', 'train_y.csv')
test_X, test_y = load_data('test_X.csv', 'test_y.csv')
train_X_pca = data_pca(0.95, train_X, train_X)
test_X_pca = data_pca(0.95, train_X, test_X)
train_y = np.array(train_y).ravel()
test_y = np.array(test_y).ravel()
#set up model with the optimal C
my_svm = svm.SVC(kernel='linear', C=optimal_c, class_weight='auto')
predicted_y = my_svm.fit(train_X_pca,train_y).decision_function(test_X_pca)
fpr, tpr, tr = roc_curve(test_y, predicted_y)
print auc(fpr, tpr)
开发者ID:LEONOB2014,项目名称:DS-GA1001-Project,代码行数:18,代码来源:svm_testing.py
示例6: run
def run(train_file, test_file, output_file):
train, labels, test = utils.load_data(train_file, test_file)
clf = XGBoost(max_iterations=500, max_depth=12, min_child_weight=4.9208250938262745,
row_subsample=.9134478530382129, min_loss_reduction=.5132278416508804,
column_subsample=.730128689911957, step_size=.1)
clf.fit(train, labels)
predictions = clf.predict_proba(test)
utils.save_prediction(output_file, predictions)
开发者ID:shqyking,项目名称:BigDataProject,代码行数:9,代码来源:xgboost.py
示例7: main
def main():
revision = 4
print("Loading the classifier")
classifier = utility.load_model("train_rtext_rev{}".format(revision))
print("Reading in the training data")
train = utility.load_data("training", "rtext")
print("Predicting the rest of the training data")
bunch = 50000
pred = np.zeros(len(train))
for ibunch in range(int(len(train) / bunch)) :
beg = ibunch * bunch
end = (ibunch + 1) * 50000
mtrain = train.ix[beg:end - 1]
mpred = np.ravel(classifier.predict(list(mtrain['rtext_bcat'])))
pred[beg:end] = mpred
beg = int(len(train) / bunch) * bunch
mtrain = train.ix[beg:]
mpred = np.ravel(classifier.predict(list(mtrain['rtext_bcat'])))
pred[beg:] = mpred
score = utility.rmsle_log(pred, train['votes_useful_log'])
print "Score:", score
print("Writing out new training data")
del train['rtext_bcat']
train['votes_useful_log_rtextpred'] = pd.Series(pred, index=train.index)
utility.save_data(train, "training", "rtext_rev{}".format(revision))
print("Reading in the test data")
test = utility.load_data("test", "rtext")
tepred = np.ravel(classifier.predict(list(test['rtext_bcat'])))
print("Writing out new test data")
del test['rtext_bcat']
test['votes_useful_log_rtextpred'] = pd.Series(tepred, index=test.index)
utility.save_data(test, "test", "rtext_rev{}".format(revision))
test['votes'] = pd.Series(np.exp(tepred) + 1, index=test.index)
print("Writing out a new submission file")
utility.write_submission(test, "rtextrf_sub_rev{}.csv".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:44,代码来源:predict_rtext.py
示例8: main
def main():
print("Reading in the training data")
train = utility.load_data("training", "finalinput")
truth = np.ravel(np.array(train['votes_useful_log']))
del train['votes_useful_log']
print("Extracting features and training review text model")
classifier = get_pipeline()
classifier.fit(train.values[:,1:], np.array(truth))
print("Saving the classifier")
utility.save_model(classifier, "fullsgd_model_rev{}".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:12,代码来源:train.py
示例9: main
def main():
#load datasets
train_X, train_Y = load_data('train_X.csv', 'train_y.csv')
train_X_pca = data_pca(0.95, train_X, train_X)
train = train_X_pca
train['Y'] = train_Y
#set a list of hyperparameter C
c = [10**i for i in range(-9,2)]
#conduct X cross validation and return AUCs in each sample for each C
aucs=xValSVM(train, 'Y', 5, c)
#calculate the average and standard error of AUC for each C
avg, stderr = avg_stderr(aucs, c)
#plot the results of cross validation
plotxValSVM(avg, stderr, c)
开发者ID:LEONOB2014,项目名称:DS-GA1001-Project,代码行数:15,代码来源:svm_val.py
示例10: main
def main():
revision = 4
print("Reading in the training data")
train = utility.load_data("training", "rtext")
inds = random.sample(range(len(train)), 100000)
mtrain = train.ix[inds]
print("Extracting features and training review text model")
classifier = get_pipeline()
classifier.fit(list(mtrain['rtext_bcat']),
list(mtrain['votes_useful_log']))
print("Saving the classifier")
utility.save_model(classifier, "train_rtext_rev{}".format(revision))
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:15,代码来源:train_rtext.py
示例11: main
def main():
trabus = utility.load_data("training", "business")
tesbus = utility.load_data("test", "business")
bus = pd.concat((trabus, tesbus))
for cat in delbuscats :
if hasattr(bus, cat) : del bus[cat]
bus['procbcat'] = pd.Series(map(process_bcat, bus['categories']), bus.index)
del bus['categories']
for s in ["training", "test"] :
rev = utility.load_data(s, "review")
for cat in delrevcats :
if hasattr(rev, cat) : del rev[cat]
if hasattr(rev, 'votes_useful') :
rev['votes_useful_log'] = np.log(rev.votes_useful + 1)
rev = pd.merge(rev, bus, 'inner')
rev['rtext_bcat'] = rev['text'] + rev['procbcat']
del rev['procbcat']
del rev['text']
utility.save_data(rev, s, 'rtext')
开发者ID:mrphilroth,项目名称:kaggle-yelp,代码行数:24,代码来源:preprocess_rtext.py
示例12: printPOS
def printPOS(pos_words):
#pos_words is a list of (word, tag)
s = ""
t = ""
for p in pos_words:
l = len(p[0]) if len(p[0]) > len(p[1]) else len(p[1])
s = s + p[0].rjust(l) + ' '
t = t + p[1].rjust(l) + ' '
print '-----------'
print s
print t
print ""
if __name__ == '__main__':
if len(sys.argv) != 2:
print "Usage: python showTaggedSentences.py <input file>"
sys.exit(0)
qaTests = load_data(sys.argv[1])
showAllTaggedSentences(qaTests)
开发者ID:wangxu724,项目名称:NLPproject,代码行数:26,代码来源:showTaggedSentences.py
示例13: mask_load
def mask_load(self):
self.url_masks = utility.load_data("urlmasks", {})
开发者ID:Merola,项目名称:pynik,代码行数:2,代码来源:title_reader.py
示例14: load_urls
def load_urls(self):
self.url_lists = utility.load_data("urls", {})
开发者ID:Merola,项目名称:pynik,代码行数:2,代码来源:title_reader.py
示例15: on_load
def on_load(self):
self.id_directory = utility.load_data('schema_id', {})
self.id_presets = utility.load_data('schema_fav', {})
开发者ID:IcEBnd,项目名称:pyirkbot,代码行数:3,代码来源:ical_parser.py
示例16: on_load
def on_load(self):
self.favorites = utility.load_data("favorites", {})
开发者ID:raek,项目名称:pynik,代码行数:2,代码来源:favorites.py
示例17: train_weakgbm
import utility
import numpy as np
import pandas as pd
import multiprocessing
from sklearn.grid_search import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
nweakgbms = 18
dftest = utility.load_data('test')
dftrain = utility.load_data('train')
dftestpreds = pd.DataFrame(dftest.id)
dftrainpreds = pd.DataFrame({'id':np.arange(len(dftrain)),
'ACTION':dftrain.ACTION})
y = np.array(dftrain.ACTION)
del dftrain['ACTION']
X = np.array(dftrain)
Xtest = np.array(dftest)[:,1:]
def train_weakgbm(i) :
cols = np.ones(9)
cols[i % X.shape[1]] = 0
smallX = np.compress(cols, X, axis=1)
X_cvtrain, X_cvtest, y_cvtrain, y_cvtest = train_test_split(
开发者ID:jamesjohnson92,项目名称:kaggle-amazonaccess,代码行数:31,代码来源:weakgbms.py
示例18: on_load
def on_load(self):
self.location = utility.load_data('festern_bbq', "okänt")
开发者ID:IcEBnd,项目名称:pyirkbot,代码行数:2,代码来源:festern_bbq.py
示例19: on_load
def on_load(self):
self.__aliases = utility.load_data("stockaliases", {})
开发者ID:osund,项目名称:pynik,代码行数:2,代码来源:stock.py
示例20: on_load
def on_load(self):
self.places = utility.load_data("postnr_addresses", {})
开发者ID:IcEBnd,项目名称:pyirkbot,代码行数:2,代码来源:postnr.py
注:本文中的utility.load_data函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论