本文整理汇总了Python中utils.Regex.Regex类的典型用法代码示例。如果您正苦于以下问题:Python Regex类的具体用法?Python Regex怎么用?Python Regex使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Regex类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
class TopsyScrapper:
isFinished = False
def __init__(self, filename):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.filename = filename
self.url = 'http://topsy.com/s?'
self.csvWriter = Csv('topsy.csv')
csvDataHeader = ['Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score', ' Date of scrape']
self.csvWriter.writeCsvRow(csvDataHeader)
def run(self):
self.scrapData()
self.csvWriter.closeWriter()
def scrapData(self):
try:
file = open(self.filename, 'rb')
for line in file.readlines():
if self.isFinished: return
line = self.regex.replaceData('\r+', '', line)
line = self.regex.reduceNewLine(line)
line = self.regex.reduceBlankSpace(line)
line = line.strip()
if len(line) > 0:
params = urllib.urlencode({'q': line, 'window': 'm', 'type': 'tweet'})
url = self.url + params
self.scrapBrowserData(url, line)
except Exception, x:
print x
开发者ID:tuly,项目名称:Python-Internet-Harvesting,代码行数:33,代码来源:TopsyScrapper.py
示例2: __init__
class GoogleFinanceScrapper:
isFinished = False
def __init__(self, filename):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.filename = filename
self.url = 'https://www.google.com/finance?'
self.main_url = 'https://www.google.com'
self.csvWriter = Csv('google_finance.csv')
csvDataHeader = ['Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue', 'Date of Scrape']
self.csvWriter.writeCsvRow(csvDataHeader)
def run(self):
self.scrapData()
self.csvWriter.closeWriter()
def scrapData(self):
try:
file = open(self.filename, 'rb')
for line in file.readlines():
if self.isFinished: return
line = self.regex.replaceData('\r+', '', line)
line = self.regex.reduceNewLine(line)
line = self.regex.reduceBlankSpace(line)
line = line.strip()
params = urllib.urlencode({'q': line})
url = self.url + params
self.scrapBykeyword(url, line)
except Exception, x:
print x
self.logger.error('Error: ' + x.message)
开发者ID:tuly,项目名称:Python-Internet-Harvesting,代码行数:34,代码来源:GoogleFinanceScrapper.py
示例3: NisbetProduct
class NisbetProduct(QtCore.QThread):
scrapProductData = QtCore.pyqtSignal(object)
stopThread = QtCore.pyqtSignal(int)
def __init__(self):
QtCore.QThread.__init__(self)
self.isExiting = False
self.totalProducts = 0
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
dupCsvReader = Csv()
self.dupCsvRows = dupCsvReader.readCsvRow('nisbets.csv', 0)
self.csvWriter = Csv('nisbets.csv')
self.mainUrl = 'http://www.nisbets.co.uk'
csvHeaderList = ['URL', 'Product Code', 'Product Technical Specifications', 'Product Name', 'Brand',
'Product Price', 'Product Short Description',
'Product Long Description', 'Image File Name', 'User Manual File Name',
'Exploded View File Name', 'Spares Code', 'Accessories', 'Product Status' 'Category1',
'Category2', 'Category3',
'Category4']
if 'URL' not in self.dupCsvRows:
self.csvWriter.writeCsvRow(csvHeaderList)
self.dupCsvRows.append(csvHeaderList[0])
self.utils = Utils()
def run(self):
self.scrapData()
def stop(self):
self.isExiting = True
def scrapData(self):
if self.isExiting: return
self.scrapProductData.emit('<font color=green><b>Main URL: </b>%s</font>' % self.mainUrl)
self.logger.debug('===== URL [' + self.mainUrl + '] =====')
data = self.spider.fetchData(self.mainUrl)
if data and len(str(data).strip()) > 0:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
category1Chunk = self.regex.getAllSearchedData('(?i)<li id="li-id-\d+">(.*?)</ul> </li>', data)
if category1Chunk and len(str(category1Chunk).strip()) > 0:
i = 0
for category1Data in category1Chunk:
category1 = self.regex.getSearchedData('(?i)<a href="[^"]*">([^<]*)</a>', category1Data)
category2Chunk = self.regex.getAllSearchedData('(?i)<li><a href="([^"]*)">([^<]*)</a>',
category1Data)
if category2Chunk and len(str(category2Chunk).strip()) > 0:
for category2Data in category2Chunk:
try:
self.scrapCategory2Data(self.mainUrl + category2Data[0], category1, category2Data[1])
except Exception, x:
self.logger.error(x)
self.scrapProductData.emit('<font color=red><b>Finish Scraping Product data from %s</b></font>' % self.mainUrl)
开发者ID:rabbicse,项目名称:Python-Nisbets-Scrapper,代码行数:55,代码来源:NisbetProduct.py
示例4: downloadFile
def downloadFile(self, url, downloadPath, proxyHandler=None, notifier=None, retry=0):
try:
if os.path.exists(downloadPath) and os.path.getsize(downloadPath):
if notifier is not None:
notifier.emit('<font color=red><b>Image file already exists. Skip downloading file.</b></font>')
return
notifier.emit(('<font color=blue><b>Image URL: %s</b></font>' % url))
regex = Regex()
opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0))
opener.addheaders = [
config.USER_AGENT,
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
('Connection', 'keep-alive')]
if proxyHandler is not None:
opener.add_handler(proxyHandler)
resp = urllib2.urlopen(url, timeout=30)
contentLength = resp.info()['Content-Length']
contentLength = regex.getSearchedData('(?i)^(\d+)', contentLength)
totalSize = float(contentLength)
directory = os.path.dirname(downloadPath)
if not os.path.exists(directory):
os.makedirs(directory)
dl_file = open(downloadPath, 'wb')
currentSize = 0
CHUNK_SIZE = 32768
while True:
data = resp.read(CHUNK_SIZE)
if not data:
break
currentSize += len(data)
dl_file.write(data)
msg = '=====> ' + str(round(float(currentSize * 100) / totalSize, 2)) + \
'% of ' + str(totalSize / (1024)) + ' KB'
print('=====> ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str(
totalSize) + ' bytes')
if notifier is not None:
notifier.emit('<font color=blue><b>%s</b></font>' % msg)
if currentSize >= totalSize:
dl_file.close()
return True
except Exception, x:
print x
notifier.emit(('<font color=red><b>Error Download Image URL: %s</b></font>' % url))
if retry < 1:
notifier.emit('<font color=black><b>Will retry after 5 seconds.</b></font>')
time.sleep(5)
notifier.emit('<font color=black><b>Retry...</b></font>')
self.downloadFile(url, downloadPath, proxyHandler, notifier, retry + 1)
else:
notifier.emit('<font color=red><b>Failed to download after maximum retry.</b></font>')
开发者ID:tuly,项目名称:Python-Ebay-Details,代码行数:54,代码来源:Spider.py
示例5: SaraivaScrapper
class SaraivaScrapper(QThread):
notifySaraiva = pyqtSignal(object)
def __init__(self, urlList, category, htmlTag, replaceTag):
QThread.__init__(self)
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.urlList = urlList
self.category = category
self.htmlTag = self.regex.replaceData('\r+', '', htmlTag)
self.htmlTag = self.regex.replaceData('\n+', ' ', self.htmlTag)
self.htmlTag = self.regex.replaceData('\s+', ' ', self.htmlTag)
self.htmlTag = self.regex.replaceData(r'\"+', '\"', self.htmlTag)
self.replaceTag = replaceTag
self.csvWriter = Csv(category + '.csv')
csvDataHeader = ['Link', 'Name', 'Subtitle', 'Price', 'Synopsis and Characteristics', 'Picture']
self.csvWriter.writeCsvRow(csvDataHeader)
self.mainUrl = 'http://busca.livrariasaraiva.com.br'
self.scrapUrl = None
self.dbHelper = DbHelper('saraiva.db')
self.dbHelper.createTable(category)
self.total = self.dbHelper.getTotalProduct(category)
def run(self, retry=0):
try:
if self.urlList is not None and len(self.urlList):
for url in self.urlList:
if len(url) > 0:
url = self.regex.replaceData('(?i)\r', '', url)
url = self.regex.replaceData('(?i)\n', '', url)
self.notifySaraiva.emit('<font color=green><b>Saraiva Main URL: %s</b></font>' % url)
paginationUrl, self.maxRecords = self.reformatUrl(url)
self.notifySaraiva.emit(
'<font color=black><b>Total Records: %s</b></font>' % str(self.maxRecords))
print 'Max records: ', self.maxRecords
print 'URL: ' + str(paginationUrl)
sortList = ['&isort=globalpop', '&isort=best', '&isort=title', '&isort=title+rev',
'&isort=price+rev',
'&isort=price', '&isort=date+rev']
for sort in sortList:
self.scrapResults(paginationUrl, sort)
self.notifySaraiva.emit('<font color=red><b>Saraiva Data Scraping finished.</b></font>')
except Exception, x:
print x.message
self.logger.error('Exception at run: ', x.message)
if retry < 5:
self.run(retry + 1)
开发者ID:rabbicse,项目名称:Python-Saraiva-Scrapper,代码行数:49,代码来源:SaraivaScrapper.py
示例6: PaodeacucarScrapper
class PaodeacucarScrapper(QThread):
notifyPaode = pyqtSignal(object)
def __init__(self):
QThread.__init__(self)
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.mainUrl = 'http://www.paodeacucar.com.br/'
self.url = 'http://www.paodeacucar.com.br/'
dupCsvReader = Csv()
self.dupCsvRows = dupCsvReader.readCsvRow('paodeacucar.csv', 4)
self.csvWriter = Csv('paodeacucar.csv')
csvDataHeader = ['SKU', 'Category', 'Subcategory', 'Name', 'URL', 'URL Image', 'Details',
'Nutrients Table html code', 'Price from, 28/abr/14', '28/abr/14']
if 'URL' not in self.dupCsvRows:
self.dupCsvRows.append(csvDataHeader)
self.csvWriter.writeCsvRow(csvDataHeader)
def run(self):
self.scrapData()
def scrapData(self):
try:
print 'Main URL: ', self.url
self.notifyPaode.emit(('<font color=green><b>Main URL: %s</b></font>' % self.url))
data = self.spider.fetchData(self.url)
if data and len(data) > 0:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
soup = BeautifulSoup(data)
categories = soup.find('nav', class_='items-wrapper').find_all('li', class_=re.compile('\s*item\s*'))
print 'Total Categories: ', len(categories)
self.notifyPaode.emit(('<font color=black><b>Total Categories: %s</b></font>' % str(len(categories))))
for category in categories:
if category.a is not None:
submenu_target = self.regex.replaceData('#', '', category.a.get('data-target'))
sub_categories = soup.find('ul', id=submenu_target).find_all('li', class_='item')
print 'Total Sub Categories: ', len(sub_categories)
self.notifyPaode.emit(('<font color=black><b>Total Subcategories: %s</b></font>' % str(len(sub_categories))))
for sub_category in sub_categories:
sub_category_label = sub_category.find('span', class_='label').text
sub_category_url = sub_category.a.get('href') if sub_category.a is not None else 'N/A'
self.scrapItems(sub_category_url, category.text, sub_category_label)
except Exception, x:
self.logger.error(x.message)
print x
开发者ID:tuly,项目名称:Python-Paodeacucar-Spider,代码行数:48,代码来源:PaodeacucarScrapper.py
示例7: __init__
def __init__(self):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.mainUrl = 'http://www.ebags.com'
self.url = 'http://www.ebags.com/brands'
开发者ID:rabbicse,项目名称:python-spider-bs4,代码行数:7,代码来源:EbagsScrapper.py
示例8: __init__
def __init__(self, input_file, output_file):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.input_file = input_file
self.output_file = output_file
开发者ID:HughP,项目名称:Python-WP-Login,代码行数:7,代码来源:WpScrapper.py
示例9: __init__
def __init__(self):
QtCore.QThread.__init__(self)
self.isExiting = False
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
dupCsvReader = Csv()
self.dupCsvRows = dupCsvReader.readCsvRow("nisbets.csv", 0)
self.csvWriter = Csv("nisbets.csv")
self.mainUrl = "http://www.nisbets.co.uk"
csvHeaderList = [
"URL",
"Product Code",
"Product Technical Specifications",
"Product Name",
"Brand",
"Product Price",
"Product Short Description",
"Product Long Description",
"Image File Name",
"User Manual File Name",
"Exploded View File Name",
"Spares Code",
"Accessories",
"Product Status" "Category1",
"Category2",
"Category3",
"Category4",
]
if "URL" not in self.dupCsvRows:
self.csvWriter.writeCsvRow(csvHeaderList)
self.dupCsvRows.append(csvHeaderList[0])
self.utils = Utils()
开发者ID:rabbicse,项目名称:Python-LinkedIn-Scrapper,代码行数:34,代码来源:NisbetProduct.py
示例10: CsTest
class CsTest(QThread):
notifyProduct = pyqtSignal(object)
def __init__(self):
QThread.__init__(self)
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
dupCsvReader = Csv()
self.dupCsvRows0 = dupCsvReader.readCsvRow('cs_product.csv', 0)
self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 1)
self.csvWriter = Csv('cs_product.csv')
# self.mainUrl = 'http://www.cs-catering-equipment.co.uk/'
self.mainUrl = 'http://www.cs-catering-equipment.co.uk/brands'
self.utils = Utils()
if 'Product Code' not in self.dupCsvRows:
self.csvWriter.writeCsvRow(
['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount',
'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty'
,
'Delivery',
'Product Image',
'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image'])
self.totalProducts = len(self.dupCsvRows)
def run(self):
self.scrapBrands()
self.notifyProduct.emit('<font color=red><b>Finished Scraping All Brands.</b></font>')
def scrapBrands(self):
self.notifyProduct.emit('<font color=green><b>Main URL: %s<b></font>' % self.mainUrl)
self.notifyProduct.emit('<b>Try To scrap All Brands.<b>')
data = self.spider.fetchData(self.mainUrl)
if data and len(data) > 0:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
brandChunks = self.regex.getAllSearchedData('(?i)<div class="man-group man-group-[a-z]">(.*?)</div>', data)
if brandChunks and len(brandChunks) > 0:
for brandChunk in brandChunks:
brands = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', brandChunk)
self.notifyProduct.emit('<b>Total Brands Found: %s<b>' % str(len(brands)))
if brands and len(brands) > 0:
for brand in brands:
try:
self.scrapBrandInfo(brand[0], 'Shop By Brand', brand[1])
except Exception, x:
self.logger.error(x)
开发者ID:rabbicse,项目名称:Python-CsCatering-scrapper,代码行数:47,代码来源:CsTest.py
示例11: CsBrands
class CsBrands(QThread):
notifyBrand = pyqtSignal(object)
def __init__(self):
QThread.__init__(self)
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
dupCsvReader = Csv()
self.dupCsvRows = dupCsvReader.readCsvRow("cs_Brands.csv")
self.csvWriter = Csv("cs_Brands.csv")
self.mainUrl = "http://www.cs-catering-equipment.co.uk/brands"
self.isExiting = False
headerData = [
"URL",
"Parent Category",
"Brand Category",
"Brand Description",
"Image File",
"Product Codes in this category",
]
if headerData not in self.dupCsvRows:
self.csvWriter.writeCsvRow(headerData)
def run(self):
self.scrapBrands()
self.notifyBrand.emit("<font color=red><b>Finished Scraping All Brands.</b></font>")
def scrapBrands(self):
self.notifyBrand.emit("<font color=green><b>Main URL: %s<b></font>" % self.mainUrl)
self.notifyBrand.emit("<b>Try To scrap All Brands.<b>")
data = self.spider.fetchData(self.mainUrl)
if data and len(data) > 0:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
brandChunks = self.regex.getAllSearchedData('(?i)<div class="man-group man-group-[a-z]">(.*?)</div>', data)
if brandChunks and len(brandChunks) > 0:
for brandChunk in brandChunks:
brands = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', brandChunk)
self.notifyBrand.emit("<b>Total Brands Found: %s<b>" % str(len(brands)))
if brands and len(brands) > 0:
for brand in brands:
try:
self.scrapBrandInfo(brand[0], "Shop By Brand", brand[1])
except Exception, x:
self.logger.error(x)
开发者ID:rabbicse,项目名称:Python-Bertos-Scrapper,代码行数:47,代码来源:CsBrands.py
示例12: __init__
def __init__(self, spider, memberList, subject, message):
QThread.__init__(self)
# self.spider = Spider()
self.spider = spider
self.regex = Regex()
self.memberList = memberList
self.subject = unicode(subject)
self.message = unicode(message)
开发者ID:rabbicse,项目名称:Python-LinkedIn-Scrapper,代码行数:8,代码来源:MyLinkedInMessage.py
示例13: __init__
def __init__(self):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.csvWriter = Csv('nisbets.csv')
self.mainUrl = 'http://www.nisbets.co.uk'
csvHeaderList = ['Category', 'Product Image Url', 'Product Code', 'Product Name', 'Price']
self.csvWriter.writeCsvRow(csvHeaderList)
开发者ID:rabbicse,项目名称:Python-Bertos-Scrapper,代码行数:8,代码来源:Nisbets.py
示例14: AmazonScrapper
class AmazonScrapper():
def __init__(self, url):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.url = url
self.base_product_url = 'http://www.amazon.com/dp/'
self.base_image_url = 'http://ecx.images-amazon.com/images/I/'
self.csvWriter = Csv('amazon.csv')
csvDataHeader = ['URL', 'HTML Path', 'Image URLS']
self.csvWriter.writeCsvRow(csvDataHeader)
def scrapData(self):
try:
host = ('Host', 'www.amazon.com')
data = self.spider.fetchData(self.url, host=host)
if data:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
searchParams = self.regex.getSearchedData('(?i)var searchParams = {([^\}]*)}', data)
searchParams = searchParams.split(',')
seller = ''
marketPlaceId = ''
useMYI = ''
for searchParam in searchParams:
searchParam = self.regex.reduceBlankSpace(searchParam)
searchParam = self.regex.replaceData('\'', '', searchParam)
if searchParam.startswith('seller'):
seller = searchParam.split(':')[1].strip()
seller = seller.decode('string-escape')
if searchParam.startswith('marketplaceID'):
marketPlaceId = searchParam.split(':')[1].strip()
marketPlaceId = marketPlaceId.decode('string-escape')
if searchParam.startswith('useMYI'):
useMYI = searchParam.split(':')[1].strip()
useMYI = useMYI.decode('string-escape')
params = {'seller': seller,
'marketPlaceId': marketPlaceId,
'useMYI': useMYI}
ajax_url = 'http://www.amazon.com/gp/aag/ajax/productWidget.html'
self.scrapAjaxPage(ajax_url, params, host)
except Exception, x:
print x
开发者ID:rabbicse,项目名称:python-spider-bs4,代码行数:45,代码来源:AmazonScrapper.py
示例15: __init__
def __init__(self, urllist):
QThread.__init__(self)
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
print urllist
self.urllist = urllist
self.csv = Csv('scrapper.csv')
开发者ID:tuly,项目名称:Python-Ebay-Details,代码行数:9,代码来源:Scrapper.py
示例16: AmazonScrapper
class AmazonScrapper(QThread):
notifyAmazon = pyqtSignal(object)
def __init__(self, urlList, category):
QThread.__init__(self)
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.urlList = urlList
self.category = category
dupCsvReader = Csv()
self.dupCsvRows = dupCsvReader.readCsvRow(category + '.csv')
self.csvWriter = Csv(category + '.csv')
csvDataHeader = ['SKU', 'Title', 'Sub Title', 'Price', 'Shipping Weight', 'Image URL']
if csvDataHeader not in self.dupCsvRows:
self.dupCsvRows.append(csvDataHeader)
self.csvWriter.writeCsvRow(csvDataHeader)
self.mainUrl = 'http://www.amazon.com'
self.scrapUrl = None
self.dbHelper = DbHelper('amazon.db')
self.dbHelper.createTable(category)
self.total = self.dbHelper.getTotalProduct(category)
def run(self, retry=0):
try:
# self.scrapProductDetail(
# 'http://www.amazon.com/Casio-MRW-S300H-8BVCF-Solar-Powered-Analog/dp/B00ELALKH2/ref=sr_1_544/184-7248556-2619812?s=watches&ie=UTF8&qid=1397580509&sr=1-544')
# return
if self.urlList is not None and len(self.urlList):
for url in self.urlList:
if len(url) > 0:
url = self.regex.replaceData('(?i)\r', '', url)
url = self.regex.replaceData('(?i)\n', '', url)
self.notifyAmazon.emit('<font color=green><b>Amazon Main URL: %s</b></font>' % url)
imUrl = None
retry = 0
while imUrl is None and retry < 4:
imUrl = self.reformatUrl(url)
retry += 1
if imUrl is None:
imUrl = url
self.total = 0
print 'URL: ' + str(imUrl)
sortList = ['relevance-fs-browse-rank', 'price', '-price', 'reviewrank_authority',
'date-desc-rank']
for sort in sortList:
self.scrapReformatData(imUrl, sort)
self.notifyAmazon.emit(
'<font color=red><b>Finish data for Amazon Main URL: %s</b></font><br /><br />' % url)
self.notifyAmazon.emit('<font color=red><b>Amazon Data Scraping finished.</b></font>')
except Exception, x:
print x.message
self.logger.error('Exception at run: ', x.message)
if retry < 5:
self.run(retry + 1)
开发者ID:rabbicse,项目名称:Python-Amazon-Scrapper,代码行数:56,代码来源:AmazonScrapper.py
示例17: __init__
def __init__(self):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.loginUrl = 'http://www.v4.penta-transaction.com/telematica_v4/login_ing.jsp'
self.username = 'web9501201'
self.password = '784693'
self.collectionUrl = 'http://www.trggroup.net/victorinox/index.php?p=124'
self.mainUrl = 'http://www.penta-transaction.com'
开发者ID:rabbicse,项目名称:python-spider-bs4,代码行数:10,代码来源:PentaTransaction.py
示例18: __init__
def __init__(self):
self.logger = LogManager(__name__)
self.spider = Spider()
self.browser = BrowserUtil()
self.regex = Regex()
self.utils = Utils()
self.csvHeader = ['Category', 'Sub Category 1', 'Sub Category 2', 'Product Code', 'Product Name',
'Product ShortName', 'Product Description', 'List Price', 'Vendor Price', 'Availability',
'Power', 'Size', 'KW', 'Weight(kg)', 'Other Tech', 'Pdf File', 'Image File']
self.totalProducts = 0
开发者ID:TAntonio,项目名称:python-spider-envclearance,代码行数:10,代码来源:WebTable.py
示例19: PentaTransaction
class PentaTransaction():
def __init__(self):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
self.loginUrl = 'http://www.v4.penta-transaction.com/telematica_v4/login_ing.jsp'
self.username = 'web9501201'
self.password = '784693'
self.collectionUrl = 'http://www.trggroup.net/victorinox/index.php?p=124'
self.mainUrl = 'http://www.penta-transaction.com'
def scrapData(self):
self.onLogin()
data = self.spider.fetchData(self.mainUrl)
if data and len(data) > 0:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
print data
def onLogin(self):
'''
Credentials are:
action login_access
i
p
password sdfsdf
username sdfsdf
'''
try:
print self.loginUrl
loginCredentials = {'username': self.username,
'password': self.password}
loginData = self.spider.login(self.loginUrl, loginCredentials)
if loginData and len(loginData) > 0:
loginData = self.regex.reduceNewLine(loginData)
loginData = self.regex.reduceBlankSpace(loginData)
print 'Login: '
print loginData
except Exception, x:
print 'There was an error when login'
return False
开发者ID:rabbicse,项目名称:python-spider-bs4,代码行数:42,代码来源:PentaTransaction.py
示例20: __init__
def __init__(self):
QObject.__init__(self)
self.regex = Regex()
self.title = ''
self.webView = QWebView()
self.webView.settings().setAttribute(QWebSettings.AutoLoadImages, True)
self.webView.settings().setAttribute(QWebSettings.JavascriptEnabled, True)
self.webView.settings().setAttribute(QWebSettings.PluginsEnabled, True)
self.webView.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
self.pdfPrinter = QPrinter()
self.webView.loadFinished.connect(self.convertToPdf)
开发者ID:rabbicse,项目名称:Python-Web2Pdf,代码行数:11,代码来源:WebPageToPdf.py
注:本文中的utils.Regex.Regex类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论