• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python inspector.year_range函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.inspector.year_range函数的典型用法代码示例。如果您正苦于以下问题:Python year_range函数的具体用法?Python year_range怎么用?Python year_range使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了year_range函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the audit reports
  for year in year_range:
    if year < 2006:  # This is the oldest year for these reports
      year = 2006
    url = AUDIT_REPORTS_BASE_URL.format(year)
    doc = beautifulsoup_from_url(url)
    results = []
    results.extend(doc.select("tr.ms-rteTableOddRow-default"))
    results.extend(doc.select("tr.ms-rteTableEvenRow-default"))
    for result in results:
      report = audit_report_from(result, url, year_range)
      if report:
        inspector.save_report(report)

  for report_type, url in OTHER_URLS.items():
    doc = beautifulsoup_from_url(url)
    results = doc.select("#ctl00_PlaceHolderMain_ctl05_ctl01__ControlWrapper_RichHtmlField > p > a")
    for result in results:
      report = report_from(result, url, report_type, year_range)
      if report:
        inspector.save_report(report)

  doc = beautifulsoup_from_url(SEMIANNUAL_REPORTS_URL)
  results = doc.select("#ctl00_PlaceHolderMain_ctl05_ctl01__ControlWrapper_RichHtmlField > p > a")
  for result in results:
    report = semiannual_report_from(result, SEMIANNUAL_REPORTS_URL, year_range)
    if report:
      inspector.save_report(report)
开发者ID:slobdell,项目名称:inspectors-general,代码行数:31,代码来源:treasury.py


示例2: run

def run(options):
  year_range = inspector.year_range(options, archive)

  component = options.get('component')
  if component:
    components = [component]
  else:
    components = list(COMPONENTS.keys())

  report_id = options.get('report_id')

  limit = int(options.get('limit', 0))

  all_audit_reports = {}

  for component in components:
    logging.info("## Fetching reports for component %s" % component)
    url = url_for(options, component)
    body = utils.download(url)

    doc = BeautifulSoup(body)

    results = doc.select("table.contentpaneopen table[border=1] tr")
    # accept only trs that look like body tr's (no 'align' attribute)
    #   note: HTML is very inconsistent. cannot rely on thead or tbody
    results = [x for x in results if x.get('align') is None]
    if not results:
      raise inspector.NoReportsFoundError("DHS (%s)" % component)

    count = 0
    for result in results:
      report = report_from(result, component, url)
      if not report:
        continue

      if report_id and (report_id != report['report_id']):
        continue

      if inspector.year_from(report) not in year_range:
        # logging.info("[%s] Skipping, not in requested range." % report['report_id'])
        continue

      key = (report["report_id"], report["title"])
      if key in all_audit_reports:
        all_audit_reports[key]["agency"] = all_audit_reports[key]["agency"] + \
                ", " + report["agency"]
        all_audit_reports[key]["agency_name"] = \
                all_audit_reports[key]["agency_name"] + ", " + \
                report["agency_name"]
      else:
        all_audit_reports[key] = report

      count += 1
      if limit and (count >= limit):
        break

    logging.info("## Fetched %i reports for component %s\n\n" % (count, component))

  for report in all_audit_reports.values():
    inspector.save_report(report)
开发者ID:Cloudxtreme,项目名称:inspectors-general,代码行数:60,代码来源:dhs.py


示例3: run

def run(options):
  year_range = inspector.year_range(options, archive)

  doc = BeautifulSoup(utils.download(REPORTS_URL))

  # Pull the semiannual reports
  semiannul_results = doc.select("#AnnualManagementReports select")[0]
  for result in semiannul_results.select("option"):
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the special reports
  special_report_table = doc.find("table", attrs={"bordercolor": "#808080"})
  for index, result in enumerate(special_report_table.select("tr")):
    if not index:
      # Skip the header row
      continue
    report = report_from(result, REPORTS_URL, report_type='other', year_range=year_range)
    if report:
      inspector.save_report(report)

  # Pull the audit reports
  for year in year_range:
    if year < 2001:  # The oldest fiscal year page available
      continue
    year_url = AUDIT_REPORTS_URL.format(year=year)
    doc = BeautifulSoup(utils.download(year_url))
    for index, result in enumerate(doc.select("#main table tr")):
      if not index:
        # Skip the header row
        continue
      report = report_from(result, year_url, report_type='audit', year_range=year_range)
      if report:
        inspector.save_report(report)
开发者ID:slobdell,项目名称:inspectors-general,代码行数:35,代码来源:rrb.py


示例4: run

def run(options):
  year_range = inspector.year_range(options, archive)
  if datetime.datetime.now().month >= 10:
    # October, November, and December fall into the next fiscal year
    # Add next year to year_range to compensate
    year_range.append(max(year_range) + 1)

  # Pull the audit reports
  for year in year_range:
    url = audit_report_url(year)
    if url:
      parse_result_from_js_url(url, "auditreports", year, year_range, report_type='audit')
    url = inspection_report_url(year)
    if url:
      parse_result_from_js_url(url, "iereports", year, year_range, report_type='inspection')

  # Pull the congressional testimony
  doc = utils.beautifulsoup_from_url(CONGRESSIONAL_TESTIMONY_REPORTS_URL)
  results = doc.findAll("ul", type='disc')[0].select("li")
  for result in results:
    report = congressional_testimony_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the semiannual reports
  doc = utils.beautifulsoup_from_url(SEMIANNUAL_REPORTS_URL)
  results = doc.findAll("ul", type='disc')[0].select("li")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)
开发者ID:unitedstates,项目名称:inspectors-general,代码行数:31,代码来源:tigta.py


示例5: run

def run(options):
    year_range = inspector.year_range(options, archive)

    # Pull the audit reports
    for year in year_range:
        if year < 2002:  # The oldest page for audit reports
            continue
        doc = BeautifulSoup(utils.download(AUDIT_REPORTS_URL.format(year=year)))
        results = doc.select("div.content table tr")
        for index, result in enumerate(results):
            if not index:
                # Skip the header row
                continue
            report = report_from(result, report_type="audit", year_range=year_range)
            if report:
                inspector.save_report(report)

    # Pull the FOIA reports
    doc = BeautifulSoup(utils.download(FOIA_REPORTS_URL))
    results = doc.select("div.content table tr")
    for index, result in enumerate(results):
        if not index:
            # Skip the header row
            continue
        report = report_from(result, report_type="other", year_range=year_range)
        if report:
            inspector.save_report(report)

    # Pull the semiannual reports
    doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
    results = doc.select("div.content a")
    for result in results:
        report = semiannual_report_from(result, year_range)
        if report:
            inspector.save_report(report)
开发者ID:slobdell,项目名称:inspectors-general,代码行数:35,代码来源:ncua.py


示例6: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the audit reports
  for year in year_range:
    if year < 2005:  # This is the earliest audits go back
      continue
    url = AUDIT_REPORTS_URL.format(year=year)
    doc = BeautifulSoup(utils.download(url))
    results = doc.select("div.content")
    if not results:
      raise inspector.NoReportsFoundError("Tennessee Valley Authority (%d)" % year)
    for result in results:
      report = audit_report_from(result, url, year_range)
      if report:
        inspector.save_report(report)

  # Pull the semiannual reports
  doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
  results = doc.select("report")
  if not results:
    raise inspector.NoReportsFoundError("Tennessee Valley Authority (semiannual reports)")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)
开发者ID:Cloudxtreme,项目名称:inspectors-general,代码行数:26,代码来源:tva.py


示例7: run

def run(options):
  year_range = inspector.year_range(options)
  pages = options.get('pages', ALL_PAGES)

  max_page = None
  for page in range(1, (int(pages) + 1)):
    if max_page and (page > max_page):
      print("End of pages!")
      break

    print("## Downloading page %i" % page)
    url = url_for(options, page)
    body = utils.download(url)
    doc = BeautifulSoup(body)
    max_page = last_page_for(doc)

    results = doc.select(".views-row")

    for result in results:
      report = report_from(result)

      # inefficient enforcement of --year arg, USPS doesn't support it server-side
      # TODO: change to published_on.year once it's a datetime
      if inspector.year_from(report) not in year_range:
        print("[%s] Skipping report, not in requested range." % report['report_id'])
        continue

      inspector.save_report(report)
开发者ID:ericalthatcher,项目名称:inspectors-general,代码行数:28,代码来源:usps.py


示例8: run

def run(options):
    year_range = inspector.year_range(options, archive)

    # Find the number of pages to iterate
    doc = BeautifulSoup(utils.download(REPORTS_URL))
    page_count_text = doc.select("div.AspNet-GridView-Pagination")[0].text
    page_count = int(re.search("Page 1 of (\d+)", page_count_text).groups()[0])

    # Iterate over those pages
    for page in range(1, page_count + 1):
        response = utils.scraper.post(
            REPORTS_URL,
            data={
                "__EVENTTARGET": "ctl00$ctl00$MainContent$NavTreeSubContent$sv$GridViewSummary",
                "__EVENTARGUMENT": "Page${page_number}".format(page_number=page),
            },
            cookies=COOKIES,
        )
        doc = BeautifulSoup(response.content)
        results = doc.select("div.AspNet-GridView table tr")
        if not results:
            break
        for index, result in enumerate(results):
            if not index:
                # Skip the header row
                continue
            report = report_from(result, year_range)
            if report:
                inspector.save_report(report)
开发者ID:slobdell,项目名称:inspectors-general,代码行数:29,代码来源:prc.py


示例9: run

def run(options):
  year_range = inspector.year_range(options)

  # Pull the audit reports
  for year in year_range:
    url = audit_report_url(year)
    if url:
      parse_result_from_js_url(url, "auditreports", year, year_range)
    url = inspection_report_url(year)
    if url:
      parse_result_from_js_url(url, "iereports", year, year_range)

  # Pull the congressional testimony
  doc = BeautifulSoup(utils.download(CONGRESSIONAL_TESTIMONY_REPORTS_URL))
  results = doc.findAll("ul", type='disc')[0].select("li")
  for result in results:
    report = congressional_testimony_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the semiannual reports
  doc = BeautifulSoup(utils.download(SEMIANNUAL_REPORTS_URL))
  results = doc.findAll("ul", type='disc')[0].select("li")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)
开发者ID:BunsenMcDubbs,项目名称:inspectors-general,代码行数:27,代码来源:tigta.py


示例10: run

def run(options):
  year_range = inspector.year_range(options, archive)
  results_flag = False

  # Pull the audit reports
  for year in year_range:
    if year < 2002:  # The oldest page for audit reports
      continue
    if year == 2018:
      doc = utils.beautifulsoup_from_url(LATEST_AUDIT_REPORTS_URL)
    else:
      doc = utils.beautifulsoup_from_url(AUDIT_REPORTS_URL.format(year=year))

    if doc is None:
      # Next year's audit page may not be published yet
      continue

    results = doc.select("div.mainCenter table tr")
    if results:
      results_flag = True
    for index, result in enumerate(results):
      if not index:
        # Skip the header row
        continue
      report = report_from(result, report_type='audit', year_range=year_range)
      if report:
        inspector.save_report(report)

  if not results_flag:
    raise inspector.NoReportsFoundError("NCUA (audit reports)")

  # Pull the other reports
  doc = utils.beautifulsoup_from_url(OTHER_REPORTS_URL)
  results = doc.select("div.mainCenter p")
  if not results:
    raise inspector.NoReportsFoundError("NCUA (other)")
  for result in results:
    report = other_report_from(result, year_range=year_range)
    if report:
      inspector.save_report(report)

  # Pull the semiannual reports
  doc = utils.beautifulsoup_from_url(SEMIANNUAL_REPORTS_URL)
  results = doc.select("div#mainColumns div.mainCenter a")
  if not results:
    raise inspector.NoReportsFoundError("NCUA (semiannual reports)")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the performance and strategic plans
  doc = utils.beautifulsoup_from_url(PLANS_URL)
  results = doc.select("div.mainCenter p")
  if not results:
    raise inspector.NoReportsFoundError("NCUA (performance/strategic plans)")
  for result in results:
    report = plan_from(result, year_range)
    if report:
      inspector.save_report(report)
开发者ID:unitedstates,项目名称:inspectors-general,代码行数:60,代码来源:ncua.py


示例11: run

def run(options):
  year_range = inspector.year_range(options)
  only_id = options.get('report_id')

  print("## Downloading reports from %i to %i" % (year_range[0], year_range[-1]))

  url = url_for()
  body = utils.download(url)

  doc = BeautifulSoup(body)
  results = doc.select("section")

  for result in results:
    try:
      year = int(result.get("title"))
      # check that the fetched year is in the range
      if year not in year_range:
        continue
      print("## Downloading year %i " % year)
    except ValueError:
      continue

    # gets each table entry and sends generates a report from it
    listings = result.div.table.tbody.contents
    for item in listings:
      if type(item) is not bs4.element.Tag:
        continue
      report = report_from(item)

      # can limit it to just one report, for debugging convenience
      if only_id and only_id != report['report_id']:
        continue

      inspector.save_report(report)
开发者ID:spulec,项目名称:inspectors-general,代码行数:34,代码来源:opm.py


示例12: scrape_restricted_reports

def scrape_restricted_reports(options):
  """Restricted Products.

  A single HTML page lists unreleased reports since 2014, with no links."""

  # These reports are unreleased -- we could make this the text?
  """The following products have been determined to contain either
classified information or controlled unclassified information by the audited
agencies and cannot be publicly released.

Members of Congress or congressional staff who wish to obtain one or more of
these products should call or e-mail the Congressional Relations Office.
All others who wish to obtain one or more of these products should follow the
instructions found on Requesting Restricted Products."""

  REPORTS_URL = 'http://www.gao.gov/restricted/restricted_reports'
  archive = 2014

  year_range = inspector.year_range(options, archive)
  doc = utils.beautifulsoup_from_url(REPORTS_URL)
  results = doc.select("div.listing")
  for result in results:
    report = process_restricted_report(result, year_range, REPORTS_URL)
    if report:
      inspector.save_report(report)
开发者ID:unitedstates,项目名称:inspectors-general,代码行数:25,代码来源:gaoreports.py


示例13: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # # Pull the RSS feed
  doc = BeautifulSoup(utils.download(RSS_URL))
  results = doc.select("item")
  for result in results:
    report = rss_report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # # Pull the recent audit reports.
  doc = BeautifulSoup(utils.download(RECENT_AUDITS_URL))
  results = doc.select("div.block > a")
  for result in results:
    report = report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the archive audit reports
  doc = BeautifulSoup(utils.download(AUDIT_ARCHIVE_URL))
  results = doc.select("div.block a")
  for result in results:
    report = report_from(result, year_range)
    if report:
      inspector.save_report(report)

  # Pull the other reports
  doc = BeautifulSoup(utils.download(OTHER_REPORTS_URl))
  results = doc.select("div.block > a")
  for result in results:
    report = report_from(result, year_range)
    if report:
      inspector.save_report(report)
开发者ID:slobdell,项目名称:inspectors-general,代码行数:34,代码来源:smithsonian.py


示例14: scrape_reports

def scrape_reports(options):
  """Pull reports from "Reports and Testimonies - Browse by date" web page."""

  REPORTS_URL = 'http://www.gao.gov/browse/date/custom?adv_begin_date=01/01/' +\
    '%s&adv_end_date=12/31/%s&rows=50&o=%s' # % (year, year, offset)
  archive = 1970
  # Amazingly, reports go back to 1940, though those are unlikely to be
  # legible enough to OCR. Also very cool, even 1950s-era reports seem to have
  # a highlightable embedded text layer in them. Of course, it was the
  # General Accounting Office back then and less oversighty.

  year_range = inspector.year_range(options, archive)
  for year in year_range:
    is_next_page = True
    offset = 0
    while is_next_page:
      doc = utils.beautifulsoup_from_url(
        REPORTS_URL % (year, year, offset))
      results = doc.select("div.listing")
      for result in results:
        report = process_report(result, year_range)
        if report:
          inspector.save_report(report)
      page_links = doc.select("a.non-current_page")
      if len(page_links) and page_links[-1].text.startswith('Next'):
        offset += 50
      else:
        is_next_page = False
开发者ID:unitedstates,项目名称:inspectors-general,代码行数:28,代码来源:gaoreports.py


示例15: run

def run(options):
  year_range = inspector.year_range(options)
  max_pages = int(options.get('pages', 1))
  for year in year_range:
    page = 1
    done = False
    while not done:
      url = url_for(options, page, year)
      body = utils.download(url)

      doc = BeautifulSoup(body)

      next_page = page + 1
      found_next_page = False
      page_links = doc.select("li.pager-item a.active")
      for page_link in page_links:
        if page_link.text == str(next_page):
          found_next_page = True
          break
      if not found_next_page:
        done = True
      if next_page > max_pages:
        done = True

      results = doc.select("table.views-table > tbody > tr")
      for result in results:
        report = report_from(result)
        inspector.save_report(report)

      page = next_page
      if not done:
        print('Moving to next page (%d)' % page)
开发者ID:spulec,项目名称:inspectors-general,代码行数:32,代码来源:amtrak.py


示例16: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the audit reports
  results_flag = False
  for year in year_range:
    report_type = 'audit'
    for page in range(0, ALL_PAGES):
      reports_found = reports_from_page(AUDIT_REPORTS_URL, page, report_type, year_range, year)
      if not reports_found:
        break
      else:
        results_flag = True
  if not results_flag:
    raise inspector.NoReportsFoundError("Social Security Administration (audit)")

  # Pull the other reports
  for report_type, report_format in OTHER_REPORT_URLS.items():
    for page in range(0, ALL_PAGES):
      reports_found = reports_from_page(report_format, page, report_type, year_range)
      if not reports_found:
        if page == 0:
          raise inspector.NoReportsFoundError("Social Security Administration (%s)" % report_type)
        else:
          break
开发者ID:unitedstates,项目名称:inspectors-general,代码行数:25,代码来源:ssa.py


示例17: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the reports with pagination
  for report_type, report_url_format in PAGINATED_REPORT_FORMATS:
    for page in range(0, 999):
      url = report_url_format.format(page=page)
      doc = utils.beautifulsoup_from_url(url)
      if report_type == "audit" and page == 0 and not doc.select("div.views-field-field-auditreport-doc-1"):
        raise Exception("Report number CSS class has changed")
      results = doc.select("li.views-row")
      if not results:
        if page == 0:
          raise inspector.NoReportsFoundError("USAID (%s)" % report_type)
        else:
          break

      for result in results:
        report = report_from(result, url, report_type, year_range)
        if report:
          inspector.save_report(report)

  # Pull the semiannual reports (no pagination)
  doc = utils.beautifulsoup_from_url(SEMIANNUAL_REPORTS_URL)
  results = doc.select("li.views-row")
  if not results:
    raise inspector.NoReportsFoundError("USAID (semiannual reports)")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)
开发者ID:unitedstates,项目名称:inspectors-general,代码行数:31,代码来源:usaid.py


示例18: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Suggested flow, for an IG which paginates results.
  pages = options.get('pages', ALL_PAGES)
  for page in range(1, (int(pages) + 1)):
    data = {
      'view_name': 'oig_nodes',
      'view_display_id': 'block_search_oig_reports',
    }
    if page:
      # Only add page= if page > 0
      data['page'] = page

    response = utils.scraper.post(REPORTS_AJAX_URL,
        data=data,
        headers={
            "Content-Type": "application/x-www-form-urlencoded",
        },
    )
    page_html = response.json()[1]['data']
    doc = BeautifulSoup(page_html)
    results = doc.select("tr")
    if not results:
      break

    for index, result in enumerate(results):
      if not index:
        # Skip the header row
        continue
      report = report_from(result, year_range)
      if report:
        inspector.save_report(report)
开发者ID:slobdell,项目名称:inspectors-general,代码行数:33,代码来源:sba.py


示例19: run

def run(options):
  year_range = inspector.year_range(options, archive)

  pre_1998_done = False

  # Pull the audit reports
  for year in year_range:
    if year < 1998:
      if pre_1998_done:
        continue
      else:
        pre_1998_done = True
    for page_number in range(0, 10000):
      year_url = url_for(year, page_number)
      doc = beautifulsoup_from_url(year_url)
      results = doc.select("ol li")
      if not results:
        if page_number == 0:
          raise inspector.NoReportsFoundError("Department of Labor (%s)" % year_url)
        else:
          break
      for result in results:
        report = report_from(result, year_url)
        if report:
          inspector.save_report(report)

  # Pull the semiannual reports
  doc = beautifulsoup_from_url(SEMIANNUAL_REPORTS_URL)
  results = doc.select("p > a:nth-of-type(1)")
  if not results:
    raise inspector.NoReportsFoundError("Department of Labor (semiannal reports)")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)
开发者ID:Cloudxtreme,项目名称:inspectors-general,代码行数:35,代码来源:labor.py


示例20: run

def run(options):
  year_range = inspector.year_range(options, archive)

  # Pull the audit reports
  for page in range(1, 1000):
    doc = beautifulsoup_from_url("{}?RS={}".format(REPORTS_URL, page))
    results = doc.select("div.leadin")
    if not results:
      if page == 1:
        raise inspector.NoReportsFoundError("VA (audit reports)")
      else:
        break
    for result in results:
      report = report_from(result, year_range)
      if report:
        inspector.save_report(report)

  # Pull the semiannual reports
  doc = beautifulsoup_from_url(SEMIANNUAL_REPORTS_URL)
  results = doc.select("div.leadin")
  if not results:
    raise inspector.NoReportsFoundError("VA (semiannual reports)")
  for result in results:
    report = semiannual_report_from(result, year_range)
    if report:
      inspector.save_report(report)
开发者ID:Cloudxtreme,项目名称:inspectors-general,代码行数:26,代码来源:va.py



注:本文中的utils.inspector.year_range函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python jmx.JMXFiles类代码示例发布时间:2022-05-26
下一篇:
Python inspector.save_report函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap