This commit is contained in:
2024-10-14 00:08:40 +02:00
parent dbfba56f66
commit 1462d52e13
4572 changed files with 2658864 additions and 0 deletions

View File

@@ -0,0 +1 @@
http://a.a.a

View File

@@ -0,0 +1,6 @@
our ($M_C_DEEP, $M_C_WHITELIST, $M_C_BLACKLIST, $M_C_LINKMAXAGE);
$M_C_DEEP = 5;
$M_C_WHITELIST = "whitelist";
$M_C_BLACKLIST = "blacklist";
$M_C_LINKMAXAGE = 60*60*24*30; # 1 Monat

View File

@@ -0,0 +1,6 @@
our ($M_DB_HOST, $M_DB_USER, $M_DB_PASS);
$M_DB_HOST = "localhost";
$M_DB_USER = "1_mose";
$M_DB_PASS = "1_mose";

View File

@@ -0,0 +1,6 @@
$M_BASE_URL = "http://mose.andregeissler.de";
$M_HOME_LOGO = "mose.jpg";
$M_HOME_LOGO_TARGET = "http://mose.andregeissler.de";
$M_HOME_LOGO_WIDTH = "100";
$M_HOME_LOGO_HEIGHT = "100";

View File

@@ -0,0 +1,105 @@
#!/usr/bin/perl
use strict;
use WWW::Mechanize;
use DBI;
our ($M_C_DEEP, $M_C_WHITELIST, $M_C_BLACKLIST, $M_C_LINKMAXAGE);
our ($M_DB_HOST, $M_DB_USER, $M_DB_PASS);
require "config.crawl.pl";
require "config.global.pl";
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
open FH_WHITELIST, "$M_C_WHITELIST";
my @A_WHITELIST = <FH_WHITELIST>;
close FH_WHITELIST;
open FH_BLACKLIST, "$M_C_BLACKLIST";
my @A_BLACKLIST = <FH_BLACKLIST>;
close FH_BLACKLIST;
foreach my $link (@A_WHITELIST) {
chomp $link;
read_link($link, $M_C_DEEP) unless (in_blacklist($link));
}
sub read_link {
my $link = shift;
my $deep = shift;
# rekursiv $link durchlaufen
my @links = fetch_links($link);
foreach my $url (@links) {
read_url_to_db($url, $deep) unless (in_blacklist($url));
if ($deep > 0) {
$deep--;
read_link($url, $deep) unless (in_blacklist($url));
}
}
}
sub fetch_links {
# lese alle links von einer url
my $link = shift;
my $mech = WWW::Mechanize->new(onerror => undef);
$mech->get($link);
my @ret = $mech->links();
my @links;
foreach my $l (@ret) {
my $u = $l->url;
$u = $link . $u unless ($u =~ /^http/);
push @links,$u;
}
return @links;
}
sub in_blacklist {
my $link = shift;
my @blacklist = shift;
my $found=0;
foreach my $black (@A_BLACKLIST) {
chomp $black;
if ($link =~ /$black/) {
# link ist in blacklist
return 1;
}
}
# link ist nicht in blacklist\n";
return 0;
}
sub read_url_to_db {
# lese titel, tags aus webseite und schreibe diese mit der url und der tiefein die db
my $link = shift;
my $deep = shift;
my $timestamp = time();
my $dbh = DBI->connect('DBI:mysql:1_mose', '1_mose', '1_mose') || die "Could not connect to database: $DBI::errstr";
# wenn $link schon in db: update
my $sth = $dbh->prepare("SELECT count(*) FROM mose WHERE link='$link'");
$sth->execute();
my @result = $sth->fetchrow_array();
$sth->finish();
# link deep title timestamp
# lese titel von seite
my $mech = WWW::Mechanize->new(onerror => undef);
$mech->get($link);
my $title = $mech->title(onerror => undef);
if ($result[0]) { # gefunden
}
else { # nicht gefunden
print "'$title', '$link', '$deep', '$timestamp'\n";
$sth = $dbh->prepare("insert into mose (title, link, deep, timestamp) values ('$title', '$link', '$deep', '$timestamp')");
$sth->execute();
$sth->finish();
}
$dbh->disconnect();
}

View File

@@ -0,0 +1,42 @@
#!/usr/bin/perl -w
use CGI;
use CGI::Carp qw(fatalsToBrowser);
require "config.search.pl";
my $cgi = new CGI;
print $cgi->header();
print <<START;
<html>
<head>
</head>
<body>
START
print <<BODY;
<div align="center">
<p>
<a href="$M_HOME_LOGO_TARGET"><img width="$M_HOME_LOGO_WIDTH" height="$M_HOME_LOGO_HEIGHT" src="$M_BASE_URL/$M_HOME_LOGO"></a><br>
</p>
<p>
<form action="/cgi-bin/search.pl" method="post">
<input type="hidden" name="mose" value="mose">
<input type="input" size="50" name="search"><br>
<input type="submit" value="Suche">
</form>
</p>
</div>
<div style="position: relative">
<p style="position: fixed; bottom: 0; width:100%; text-align: center">
<font size="-2">copyright by andre geissler 2013</font>
</p>
</div>
BODY
print <<FOOTER;
</body>
</html>
FOOTER

View File

@@ -0,0 +1,30 @@
#!/usr/bin/perl -w
use CGI;
use CGI::Carp qw(fatalsToBrowser);
my $cgi = new CGI;
print $cgi->header();
print <<START;
<html>
<head>
</head>
<body>
START
print <<BODY;
...
BODY
print <<FOOTER;
</body>
</html>
FOOTER

View File

@@ -0,0 +1 @@
http://www.bimminger.at

View File

@@ -0,0 +1,15 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">
<html>
<head>
<title>MOSE</title>
</head>
<frameset>
<frame src="/cgi-bin/index.pl" name="MOSE">
<noframes>
<body>
<h1>MOSE meldet</h1>
<p>Der verwendete Browser ist zu alt. Ein bisschen neuer darf es dann schon sein.</p>
</body>
</noframes>
</frameset>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB