本文整理汇总了Python中mptracker.patcher.TablePatcher类的典型用法代码示例。如果您正苦于以下问题:Python TablePatcher类的具体用法?Python TablePatcher怎么用?Python TablePatcher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TablePatcher类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: committees
def committees(
cache_name=None,
throttle=None,
no_commit=False,
):
from mptracker.scraper.committees import CommitteeScraper
patcher = TablePatcher(
models.MpCommittee,
models.db.session,
key_columns=['chamber_id', 'cdep_id'],
)
http_session = create_session(
cache_name=cache_name,
throttle=throttle and float(throttle),
)
scraper = CommitteeScraper(http_session)
with patcher.process(autoflush=1000, remove=True) as add:
for committee in scraper.fetch_committees():
add(committee.as_dict(['chamber_id', 'cdep_id', 'name']))
if no_commit:
logger.warn("Rolling back the transaction")
models.db.session.rollback()
else:
models.db.session.commit()
开发者ID:rdragos,项目名称:mptracker,代码行数:28,代码来源:__init__.py
示例2: get_vote_controversy
def get_vote_controversy(no_commit=False):
controversy_patcher = TablePatcher(
models.VotingSessionControversy,
models.db.session,
key_columns=['voting_session_id'],
)
with controversy_patcher.process(remove=True) as add_controversy:
for line in get_gdrive_csv(CONTROVERSY_CSV_KEY):
add_controversy({
'title': line['title'],
'status': line['status'],
'reason': line['motive'],
'vote_meaning_yes': line['info_da'],
'vote_meaning_no': line['info_nu'],
'press_links': line['link_presa'],
'voting_session_id': line['mptracker_url'].split('/votes/')[1],
})
if no_commit:
logger.warn("Rolling back the transaction")
models.db.session.rollback()
else:
models.db.session.commit()
开发者ID:burnerelu,项目名称:mptracker,代码行数:25,代码来源:__init__.py
示例3: get_committee_policy
def get_committee_policy():
patcher = TablePatcher(
models.MpCommittee,
models.db.session,
key_columns=['id'],
)
with patcher.process() as update_committee:
for row in get_gdrive_csv(COMMITTEE_POLICY_CSV_KEY):
slug = row['policy']
policy_id = None
if slug:
policy = models.PolicyDomain.query.filter_by(slug=slug).first()
if policy is None:
logger.warn("Unknown policy domain %r", slug)
else:
policy_id = policy.id
update_committee(
dict(id=row['id'], policy_domain_id=policy_id),
create=False
)
models.db.session.commit()
开发者ID:burnerelu,项目名称:mptracker,代码行数:25,代码来源:__init__.py
示例4: get_proposal_controversy
def get_proposal_controversy():
""" Update proposal controversies from csv"""
def extract_proposal(url):
return url[url.rfind('/') + 1:]
controversy_patcher = TablePatcher(models.ProposalControversy,
models.db.session,
key_columns=['proposal_id'])
with controversy_patcher.process(remove=True) as add:
for row in get_gdrive_csv(PROPOSAL_CONTROVERSY_CSV_KEY):
proposal_id = extract_proposal(row['Link MP Tracker'])
if not proposal_id:
continue
assert models.Proposal.query.get(proposal_id)
record = {
'proposal_id': proposal_id,
'title': row['Titlu'],
'reason': row['Motive controversa'],
'press_links': row['Link presa'],
}
add(record)
models.db.session.commit()
开发者ID:burnerelu,项目名称:mptracker,代码行数:25,代码来源:__init__.py
示例5: update_person_xls
def update_person_xls():
""" Update person contact data from csv"""
from mptracker.scraper.person_xls import read_person_contact
mandate_lookup = models.MandateLookup()
people_data = []
mandate_patcher = TablePatcher(models.Mandate,
models.db.session,
key_columns=['year', 'cdep_number'])
with mandate_patcher.process() as add:
for record in read_person_contact(MINORITIES_CSV_KEY):
mandate = mandate_lookup.find(record.pop('name'), record['year'],
record['cdep_number'])
person_data = record.pop('person_data')
person_data['id'] = mandate.person_id
people_data.append(person_data)
add(record)
person_patcher = TablePatcher(models.Person,
models.db.session,
key_columns=['id'])
with person_patcher.process() as add:
for person_data in people_data:
add(person_data)
models.db.session.commit()
开发者ID:burnerelu,项目名称:mptracker,代码行数:27,代码来源:__init__.py
示例6: assets
def assets(file_path, no_commit=False):
from mptracker.scraper.assets import parse_assets
from mptracker.nlp import normalize
asset_patcher = TablePatcher(
models.AssetStatement,
models.db.session,
key_columns=['person_id', 'date'],
)
people_map = {
normalize(person.name): person.id
for person in (
models.Person.query
.join(models.Person.mandates)
.filter_by(year=2012)
)
}
with asset_patcher.process(remove=True) as add_asset:
for record in parse_assets(file_path):
person_name = normalize(record.pop('person_name'))
person_id = people_map[person_name]
del record['constituency']
del record['county']
res = add_asset({
'person_id': person_id,
'date': date(2012, 11, 1),
'raw_data': record,
'net_worth_eur': (
record['acct_value']['TOTAL_EUR']
- record['debt_value']['TOTAL_EUR']
+ record['invest_value']['TOTAL_EUR']
+ record['valuables_value']['TOTAL_EUR']
),
'land_agri_area': record['land_agri_area'],
'land_city_area': record['land_city_area'],
'realty_count': (
record['realty_apartment_count'] +
record['realty_business_count'] +
record['realty_house_count']
),
'vehicle_count': record['vehicle_count'],
'year_income_eur': (
record['family_income_value']['TOTAL_EUR'] +
record['gift_value']['TOTAL_EUR'] +
record['sales_value']['TOTAL_EUR']
),
})
if no_commit:
logger.warn("Rolling back the transaction")
models.db.session.rollback()
else:
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:56,代码来源:__init__.py
示例7: position
def position():
name_search = models.NameSearch(
models.Person.query
.join(models.Mandate)
.filter(models.Mandate.year == 2012)
.all()
)
position_patcher = TablePatcher(
models.Position,
models.db.session,
key_columns=['person_id', 'interval', 'title'],
)
with position_patcher.process(remove=True) as add_position:
for row in get_gdrive_csv(POSITION_PONTA2_CSV_KEY):
if row['temporary'].strip():
continue
name = row['name'].strip()
matches = name_search.find(name)
if len(matches) == 1:
[person] = matches
interval = parse_interval(row['start_date'], row['end_date'])
add_position({
'person_id': person.id,
'interval': interval,
'title': row['title'],
'url': row['url'] or None,
})
elif len(matches) > 1:
logger.warn("Multiple matches for %r", name)
else:
logger.warn("No matches for %r", name)
for row in get_gdrive_csv(POSITION_BIROU_CDEP_CSV_KEY):
name = row['name'].strip()
matches = name_search.find(name)
assert len(matches) == 1, \
"Expected a single match for %r, got %r" % (name, matches)
[person] = matches
add_position({
'person_id': person.id,
'interval': parse_interval(row['start_date'], row['end_date']),
'title': row['title'] + ", Biroul Permanent",
})
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:53,代码来源:__init__.py
示例8: stop_words
def stop_words():
patcher = TablePatcher(
models.Stopword,
models.db.session,
key_columns=['id'],
)
with patcher.process(remove=True) as add_stop_word:
for row in get_gdrive_csv(STOP_WORDS_CSV_KEY):
add_stop_word(row)
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:12,代码来源:__init__.py
示例9: policy_domain
def policy_domain():
patcher = TablePatcher(
models.PolicyDomain,
models.db.session,
key_columns=['slug'],
)
with patcher.process(remove=True) as add_policy_domain:
for row in get_gdrive_csv(POLICY_DOMAIN_CSV_KEY):
add_policy_domain(row)
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:12,代码来源:__init__.py
示例10: stop_words
def stop_words():
from mptracker.nlp import normalize_to_ascii
patcher = TablePatcher(
models.Stopword,
models.db.session,
key_columns=['id'],
)
with patcher.process(remove=True) as add_stop_word:
for row in get_gdrive_csv(STOP_WORDS_CSV_KEY):
add_stop_word({'id': normalize_to_ascii(row['id'])})
models.db.session.commit()
开发者ID:alexef,项目名称:mptracker,代码行数:13,代码来源:__init__.py
示例11: committee_summaries
def committee_summaries(year=2014):
from mptracker.scraper.committee_summaries import SummaryScraper
patcher = TablePatcher(models.CommitteeSummary,
models.db.session,
key_columns=['pdf_url'])
summary_scraper = SummaryScraper(get_cached_session(),
get_cached_session('question-pdf'))
records = summary_scraper.fetch_summaries(year, get_pdf_text=True)
patcher.update(records)
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:14,代码来源:__init__.py
示例12: test_remove_extra_records_honors_filter
def test_remove_extra_records_honors_filter(db_app):
records = [{'code': 'an', 'number': 1, 'name': "Anne"},
{'code': 'bo', 'number': 1, 'name': "Bob"},
{'code': 'cl', 'number': 2, 'name': "Claire"},
{'code': 'da', 'number': 2, 'name': "Dan"}]
from mptracker.patcher import TablePatcher
filter_patcher = TablePatcher(
Thing,
db.session,
key_columns=['code'],
filter={'number': 1},
)
filter_patcher.update(records)
filter_patcher.update(records[:1], remove=True)
assert sorted([t.name for t in Thing.query]) == ["Anne", "Claire", "Dan"]
开发者ID:mgax,项目名称:mptracker,代码行数:15,代码来源:test_database_patching.py
示例13: load
def load(name, include_columns=None, create=True, remove=False,
_file=sys.stdin):
from mptracker.patcher import TablePatcher
if include_columns:
include_columns = set(include_columns.split(','))
def filter_record(r):
return {k: r[k] for k in r if k in include_columns}
else:
filter_record = lambda r: r
loader = TableLoader(name)
patcher = TablePatcher(loader.model, db.session, key_columns=['id'])
records = (filter_record(loader.decode_dict(flask.json.loads(line)))
for line in _file)
patcher.update(records, create=create, remove=remove)
db.session.commit()
开发者ID:alexef,项目名称:mptracker,代码行数:15,代码来源:models.py
示例14: get_member_count
def get_member_count():
patcher = TablePatcher(
models.MemberCount,
models.db.session,
key_columns=['short_name', 'year'],
)
with patcher.process(remove=True) as add_member_count:
for row in get_gdrive_csv(MEMBER_COUNT_CSV_KEY):
short_name = row.pop('')
for year, count in row.items():
add_member_count({
'short_name': short_name,
'year': int(year),
'count': int(count),
})
models.db.session.commit()
开发者ID:burnerelu,项目名称:mptracker,代码行数:18,代码来源:__init__.py
示例15: cabinet_party
def cabinet_party():
patcher = TablePatcher(
models.CabinetMembership,
models.db.session,
key_columns=['mp_group_id', 'interval'],
)
group_by_code = {g.short_name: g for g in models.MpGroup.query}
with patcher.process(remove=True) as add_membership:
for row in get_gdrive_csv(CABINET_PARTY_CSV_KEY):
assert row['legislature'] == '2012'
group = group_by_code[row['code']]
add_membership({
'mp_group_id': group.id,
'interval': parse_interval(row['start_date'], row['end_date']),
})
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:19,代码来源:__init__.py
示例16: people
def people(
year='2012',
cache_name=None,
throttle=None,
no_commit=False,
add_people=False,
):
from mptracker.scraper.people import MandateScraper
http_session = create_session(
cache_name=cache_name,
throttle=throttle and float(throttle),
)
mandate_scraper = MandateScraper(http_session)
mandate_patcher = TablePatcher(
models.Mandate,
models.db.session,
key_columns=['year', 'cdep_number'],
)
person_patcher = TablePatcher(
models.Person,
models.db.session,
key_columns=['id'],
)
new_people = 0
chamber_by_slug = {c.slug: c for c in models.Chamber.query}
with mandate_patcher.process() as add_mandate, \
person_patcher.process() as add_person:
for mandate in mandate_scraper.fetch(year):
row = mandate.as_dict([
'year',
'cdep_number',
'minority',
'college',
'constituency',
'picture_url',
])
assert mandate.chamber_number == 2
row['chamber_id'] = chamber_by_slug['cdep'].id
if year == '2012':
end_date = mandate.end_date or date.max
row['interval'] = DateRange(TERM_2012_START, end_date)
person = (
models.Person.query
.filter_by(name=mandate.person_name)
.first())
if person is None:
if add_people:
person = models.Person(name=mandate.person_name)
models.db.session.add(person)
models.db.session.flush()
new_people += 1
else:
raise RuntimeError("Can't find person named %r"
% mandate.person_name)
assert not add_person({
'id': person.id,
'first_name': mandate.person_first_name,
'last_name': mandate.person_last_name,
}).is_new
row['person_id'] = person.id
if not mandate.minority:
county = (
models.County.query
.filter_by(name=mandate.county_name)
.first())
if county is None:
raise RuntimeError("Can't match county name %r"
% mandate.county_name)
row['county'] = county
add_mandate(row)
if new_people:
logger.info("%d new people", new_people)
if no_commit:
logger.warn("Rolling back the transaction")
models.db.session.rollback()
else:
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:92,代码来源:__init__.py
示例17: votes
def votes(
start=None,
days=1,
cache_name=None,
throttle=None,
no_commit=False,
autoanalyze=False,
):
from mptracker.scraper.votes import VoteScraper
if start is None:
start = models.db.session.execute(
'select date from voting_session '
'order by date desc limit 1').scalar() + ONE_DAY
else:
start = parse_date(start)
days = int(days)
http_session = create_session(cache_name=cache_name,
throttle=throttle and float(throttle))
vote_scraper = VoteScraper(http_session)
voting_session_patcher = TablePatcher(
models.VotingSession,
models.db.session,
key_columns=['cdeppk'],
)
vote_patcher = TablePatcher(
models.Vote,
models.db.session,
key_columns=['voting_session_id', 'mandate_id'],
)
proposal_ids = {p.cdeppk_cdep: p.id for p in models.Proposal.query}
mandate_lookup = models.MandateLookup()
new_voting_session_list = []
with voting_session_patcher.process() as add_voting_session:
with vote_patcher.process() as add_vote:
for delta in range(days):
the_date = start + ONE_DAY * delta
if the_date >= date.today():
# don't scrape today, maybe voting is not done yet!
break
logger.info("Scraping votes from %s", the_date)
for voting_session in vote_scraper.scrape_day(the_date):
record = model_to_dict(
voting_session,
['cdeppk', 'subject', 'subject_html'],
)
record['date'] = the_date
proposal_cdeppk = voting_session.proposal_cdeppk
record['proposal_id'] = (proposal_ids.get(proposal_cdeppk)
if proposal_cdeppk else None)
record['final'] = bool("vot final" in
record['subject'].lower())
vs = add_voting_session(record).row
if vs.id is None:
models.db.session.flush()
new_voting_session_list.append(vs.id)
for vote in voting_session.votes:
record = model_to_dict(vote, ['choice'])
record['voting_session_id'] = vs.id
mandate = mandate_lookup.find(
vote.mandate_name,
vote.mandate_year,
vote.mandate_number,
)
record['mandate_id'] = mandate.id
add_vote(record)
if no_commit:
logger.warn("Rolling back the transaction")
models.db.session.rollback()
else:
models.db.session.commit()
if autoanalyze:
from mptracker.votes import calculate_voting_session_loyalty
logger.info("Scheduling %d jobs", len(new_voting_session_list))
for voting_session_id in new_voting_session_list:
calculate_voting_session_loyalty.delay(voting_session_id)
开发者ID:Cristianf,项目名称:mptracker,代码行数:90,代码来源:__init__.py
示例18: controversy
def controversy():
old_voting_sessions = set(
models.VotingSession.query
.filter(models.VotingSession.controversy_id != None)
.all()
)
controversy_map = {}
for line in get_gdrive_csv(CONTROVERSY_CSV_KEY):
cdeppk = url_args(line['link']).get('idv', type=int)
slug = line['slug']
if slug not in controversy_map:
controversy_map[slug] = {
'data': {
'slug': slug,
'title': line['title'],
},
'voting_session_rows': [],
}
voting_session = (
models.VotingSession.query
.filter_by(cdeppk=cdeppk)
.first()
)
controversy_map[slug]['voting_session_rows'].append(voting_session)
controversy_patcher = TablePatcher(
models.Controversy,
models.db.session,
key_columns=['slug'],
)
with controversy_patcher.process(remove=True) as add_controversy:
for controversy in controversy_map.values():
result = add_controversy(controversy['data'])
controversy['row'] = result.row
models.db.session.flush()
voting_session_patcher = TablePatcher(
models.VotingSession,
models.db.session,
key_columns=['id'],
)
new_voting_sessions = set()
with voting_session_patcher.process() as add_voting_session:
for controversy in controversy_map.values():
for voting_session in controversy['voting_session_rows']:
data = {
'id': voting_session.id,
'controversy_id': controversy['row'].id,
}
add_voting_session(data, create=False)
new_voting_sessions.add(voting_session)
for voting_session in old_voting_sessions - new_voting_sessions:
add_voting_session({
'id': voting_session.id,
'controversy_id': None,
})
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:66,代码来源:__init__.py
示例19: transcripts
def transcripts(start=None, n_sessions=1, cache_name=None, throttle=None):
from mptracker.scraper.transcripts import TranscriptScraper
if start is None:
max_serial = models.db.session.execute(
'select serial from transcript_chapter '
'order by serial desc limit 1').scalar()
start = int(max_serial.split('/')[0]) + 1
cdeppk = int(start) - 1
n_sessions = int(n_sessions)
transcript_scraper = TranscriptScraper(
session=create_session(cache_name=cache_name,
throttle=throttle and float(throttle)))
mandate_lookup = models.MandateLookup()
transcript_patcher = TablePatcher(models.Transcript,
models.db.session,
key_columns=['serial'])
with transcript_patcher.process() as add:
while n_sessions > 0:
n_sessions -= 1
cdeppk += 1
logger.info("Fetching session %s", cdeppk)
session_data = transcript_scraper.fetch_session(cdeppk)
if session_data is None:
logger.info("No content")
continue
for chapter in session_data.chapters:
chapter_row = (models.TranscriptChapter.query
.filter_by(serial=chapter.serial)
.first())
if chapter_row is None:
chapter_row = models.TranscriptChapter(
serial=chapter.serial)
models.db.session.add(chapter_row)
models.db.session.flush()
chapter_row.date = session_data.date
chapter_row.headline = chapter.headline
for paragraph in chapter.paragraphs:
if paragraph['mandate_chamber'] != 2:
continue
try:
mandate = mandate_lookup.find(
paragraph['speaker_name'],
paragraph['mandate_year'],
paragraph['mandate_number'])
except models.LookupError as e:
logger.warn("at %s %s", paragraph['serial'], e)
continue
transcript_data = {
'chapter_id': chapter_row.id,
'text': paragraph['text'],
'serial': paragraph['serial'],
'mandate_id': mandate.id,
}
add(transcript_data)
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:65,代码来源:__init__.py
示例20: import_person_xls
def import_person_xls(xls_path):
from mptracker.scraper.person_xls import read_person_xls
mandate_lookup = models.MandateLookup()
people_data = []
committees = {}
committee_memberships = []
groups = {}
group_memberships = []
mandate_patcher = TablePatcher(models.Mandate,
models.db.session,
key_columns=['year', 'cdep_number'])
with mandate_patcher.process() as add:
for record in read_person_xls(xls_path):
mandate = mandate_lookup.find(record.pop('name'), record['year'],
record['cdep_number'])
person_data = record.pop('person_data')
person_data['id'] = mandate.person_id
people_data.append(person_data)
mandate_committees = record.pop('committees')
mp_group = record.pop('mp_group')
mandate = add(record).row
for data in mandate_committees:
committees[data['name']] = None
committee_memberships.append(
(mandate.id, data['name'], data['role']))
groups[mp_group['short_name']] = None
group_memberships.append(
(mandate.id, mp_group['short_name'], mp_group['role']))
person_patcher = TablePatcher(models.Person,
models.db.session,
key_columns=['id'])
with person_patcher.process() as add:
for person_data in people_data:
add(person_data)
committee_patcher = TablePatcher(models.MpCommittee,
models.db.session,
key_columns=['name'])
with committee_patcher.process() as add:
for name in list(committees):
mp_committee = add({'name': name}).row
committees[name] = mp_committee.id
committee_membership_patcher = TablePatcher(models.MpCommitteeMembership,
models.db.session, key_columns=['mandate_id', 'mp_committee_id'])
with committee_membership_patcher.process() as add:
for mandate_id, name, role in committee_memberships:
add({
'mandate_id': mandate_id,
'mp_committee_id': committees[name],
'role': role,
})
mp_group_patcher = TablePatcher(models.MpGroup,
models.db.session,
key_columns=['short_name'])
with mp_group_patcher.process() as add:
for short_name in list(groups):
mp_group = add({'short_name': short_name}).row
groups[short_name] = mp_group.id
mp_group_membership_patcher = TablePatcher(models.MpGroupMembership,
models.db.session, key_columns=['mandate_id', 'mp_group_id'])
with mp_group_membership_patcher.process() as add:
for mandate_id, name, role in group_memberships:
add({
'mandate_id': mandate_id,
'mp_group_id': groups[name],
'role': role,
})
models.db.session.commit()
开发者ID:Cristianf,项目名称:mptracker,代码行数:76,代码来源:__init__.py
注:本文中的mptracker.patcher.TablePatcher类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论