/**
* This helper function can be used to get a valid uri from an url and return it.
*
* @param string $url
*
* @return mixed
*/
function getUri($url)
{
if (!empty($url)) {
// Sanitize URL first by removing unwanted chars
$url = preg_replace("/[\n\r]/", '', $url);
// Sanitize URL accourding to RFC1738 (perhaps use RFC3986?)
$entities = [' '];
$replacements = ['%20'];
$url = str_replace($entities, $replacements, $url);
// Check weither the domain is actually valid
if (getDomain($url) == false) {
return false;
}
$pslManager = new Pdp\PublicSuffixListManager();
$urlParser = new Pdp\Parser($pslManager->getList());
$urlData = $urlParser->parseUrl($url)->toArray();
$path = $urlData['path'] . (!empty($urlData['query']) ? '?' . $urlData['query'] : '');
// Set the path to root if empty (default)
if (empty($path)) {
$path = '/';
}
// Sanitize PATH accourding to RFC1738 (perhaps use RFC3986?)
$entities = [' '];
$replacements = ['%20'];
$path = str_replace($entities, $replacements, $path);
return $path;
} else {
return false;
}
}
/**
* Parse attachments
* @return array Returns array with failed or success data
* (See parser-common/src/Parser.php) for more info.
*/
public function parse()
{
$xml = simplexml_load_string($this->parsedMail->getMessageBody());
$timestamp = strtotime($xml->attributes()->date);
foreach ($xml->list as $reports) {
$this->feedName = (string) $reports->attributes()->type;
// If feed is known and enabled, validate data and save report
if ($this->isKnownFeed() && $this->isEnabledFeed()) {
foreach ($reports->url_info as $url_info) {
$url = (string) $url_info->attributes()->url;
$ip = (string) $url_info->attributes()->ip;
$urlData = getUrlData($url);
if (filter_var($ip, FILTER_VALIDATE_IP) === false) {
// No IP supplied by Google
if (!empty($urlData['host']) && !filter_var($urlData['host'], FILTER_VALIDATE_IP) === false) {
// Hostname is an IP address
$ip = $urlData['host'];
} else {
// We have no IP address, try to get the IP address by resolving the domain
$ip = @gethostbyname($urlData['host']);
// If it fails, set to localhost
$ip = $ip == $urlData['host'] ? '127.0.0.1' : $ip;
}
}
$report = ['domain' => getDomain($url), 'uri' => getUri($url), 'category' => config("{$this->configBase}.feeds.{$this->feedName}.category")];
// Sanity check
if ($this->hasRequiredFields($report) === true) {
// incident has all requirements met, filter and add!
$report = $this->applyFilters($report);
$incident = new Incident();
$incident->source = config("{$this->configBase}.parser.name");
$incident->source_id = false;
$incident->ip = $ip;
$incident->domain = $report['domain'];
$incident->class = config("{$this->configBase}.feeds.{$this->feedName}.class");
$incident->type = config("{$this->configBase}.feeds.{$this->feedName}.type");
$incident->timestamp = $timestamp;
$incident->information = json_encode(array_merge($urlData, $report));
$this->incidents[] = $incident;
}
}
}
}
return $this->success();
}
/**
* Parse attachments
* @return array Returns array with failed or success data
* (See parser-common/src/Parser.php) for more info.
*/
public function parse()
{
if ($this->arfMail !== true) {
$this->feedName = 'default';
// If feed is known and enabled, validate data and save report
if ($this->isKnownFeed() && $this->isEnabledFeed()) {
// To get some more consitency, remove "\r" from the report.
$this->arfMail['report'] = str_replace("\r", "", $this->arfMail['report']);
// Build up the report
preg_match_all("/([\\w\\-]+): (.*)[ ]*\n/m", $this->arfMail['report'], $matches);
$report = array_combine($matches[1], $matches[2]);
// Sanity check
if ($this->hasRequiredFields($report) === true) {
// Grap the domain and user from the authentication results for contact lookup (byDomain)
preg_match("/smtp.auth=(?<user>.*)@(?<domain>.*)/m", $report['Authentication-Results'], $matches);
if (!empty($matches) && is_array($matches) && !empty($matches[0])) {
$report['Source-User'] = $matches['user'];
$report['Source-Domain'] = $matches['domain'];
}
ksort($report);
// incident has all requirements met, filter and add!
$report = $this->applyFilters($report);
$incident = new Incident();
$incident->source = config("{$this->configBase}.parser.name");
$incident->source_id = false;
$incident->ip = $report['Source-IP'];
$incident->domain = empty($report['Source-Domain']) ? false : getDomain($report['Source-Domain']);
$incident->class = config("{$this->configBase}.feeds.{$this->feedName}.class");
$incident->type = config("{$this->configBase}.feeds.{$this->feedName}.type");
$incident->timestamp = strtotime($report['Arrival-Date']);
$incident->information = json_encode($report);
$this->incidents[] = $incident;
}
}
}
return $this->success();
}
/**
* Parse attachments
* @return array Returns array with failed or success data
* (See parser-common/src/Parser.php) for more info.
*/
public function parse()
{
// Validate user based regex
try {
preg_match(config("{$this->configBase}.parser.file_regex"), '', $matches);
} catch (\Exception $e) {
$this->warningCount++;
return $this->failed('Configuration error in the regular expression');
}
foreach ($this->parsedMail->getAttachments() as $attachment) {
if (strpos($attachment->filename, '.zip') !== false && $attachment->contentType == 'application/octet-stream') {
$zip = new Zipper();
if (!$this->createWorkingDir()) {
return $this->failed("Unable to create working directory");
}
file_put_contents($this->tempPath . $attachment->filename, $attachment->getContent());
$zip->zip($this->tempPath . $attachment->filename);
$zip->extractTo($this->tempPath);
foreach ($zip->listFiles() as $index => $compressedFile) {
if (strpos($compressedFile, '.csv') !== false) {
// For each CSV file we find, we are going to do magic (however they usually only send 1 zip)
if (preg_match(config("{$this->configBase}.parser.file_regex"), $compressedFile, $matches)) {
$this->feedName = $matches[1];
// If feed is known and enabled, validate data and save report
if ($this->isKnownFeed() && $this->isEnabledFeed()) {
$csvReports = new Reader\CsvReader(new SplFileObject($this->tempPath . $compressedFile));
$csvReports->setHeaderRowNumber(0);
foreach ($csvReports as $report) {
// Handle field mappings first
$aliasses = config("{$this->configBase}.feeds.{$this->feedName}.aliasses");
if (is_array($aliasses)) {
foreach ($aliasses as $alias => $real) {
if (array_key_exists($alias, $report)) {
$report[$real] = $report[$alias];
unset($report[$alias]);
}
}
}
/*
* Legacy 3.x fix for migrations.
*
* This resolves shadowserver errors where the CSV was send in duplicate resulting
* in the header fields being used as data. If the header is detected the row can
* be skipped safely
*/
if ($report['ip'] === 'ip') {
continue;
}
// Sanity check
if ($this->hasRequiredFields($report) === true) {
// incident has all requirements met, filter and add!
$report = $this->applyFilters($report);
$incident = new Incident();
$incident->source = config("{$this->configBase}.parser.name");
$incident->source_id = false;
$incident->ip = $report['ip'];
$incident->domain = false;
$incident->class = config("{$this->configBase}.feeds.{$this->feedName}.class");
$incident->type = config("{$this->configBase}.feeds.{$this->feedName}.type");
$incident->timestamp = strtotime($report['timestamp']);
$incident->information = json_encode($report);
// some rows have a domain, which is an optional column we want to register
switch ($this->feedName) {
case "spam_url":
if (isset($report['url'])) {
$incident->domain = getDomain($report['url']);
}
break;
case "ssl_scan":
if (isset($report['subject_common_name'])) {
/*
* Common name does not add http://, but that is required for
* the domain helper check so lets add it manually
*/
$testurl = "http://{$report['subject_common_name']}";
$incident->domain = getDomain($testurl);
}
break;
case "compromised_website":
if (isset($report['http_host'])) {
$incident->domain = getDomain($report['http_host']);
}
break;
}
$this->incidents[] = $incident;
}
//End hasRequired fields
}
// End foreach report loop
}
// End isKnown & isEnabled
} else {
// Pregmatch failed to get feedName from attachment
$this->warningCount++;
}
//.........这里部分代码省略.........
/**
* Gets a title from a remote URL.
* @param [type] $url The URL to fetch a readable title from.
* @param [type] $comment A comment, if you need one.
* @return [type] Returns nothing; pushes data directly into $_SESSION
* (This may change later)
*/
function parseURL($url, $comment)
{
session_name('pubTool');
header('P3P: CP="CAO PSA OUR"');
if (verifyURL($url) == false) {
die("That doesn't look like an URL to me. Click <a href=\"index.php\">here</a> and try again!");
}
$url = HttpUri::createFromString($url);
$url = sanitizeURL($url);
$title = getTitle($url);
$domain = getDomain($url);
$_SESSION["linkList"][] = ['url' => strval($url), 'title' => strval($title), 'domain' => strval($domain), 'comment' => strval($comment)];
}
请发表评论