#! /usr/bin/perl # Name : parse_known_sites # Author : theocrite # Description : parses (gets author, date & title) of # pages grabbed by alexandrie # use strict; use LWP::Simple; use DBI; use Config::Simple; # CONFIGURATION FILE my $alexandrieConf = new Config::Simple("/etc/alexandrie/alexandrie.conf") or die "Missing configuration file."; print "Configuration file loaded.\n"; # DATABASE my $dsn = $alexandrieConf->param("database.dsn"); my $db_user_name = $alexandrieConf->param("database.username"); my $db_password = $alexandrieConf->param("database.password"); sub parse_page { my $url=shift; my $id =shift; print "parsing $url\n"; my $title_patern = ''; my $author_patern = ''; my $date_patern = ''; my $site = ''; if ($url =~ /pcinpact\.com/) { $author_patern = ''; $title_patern = '
par (.*)'; $title_patern = '
(.*)
'; $site = 'Silicon.fr'; } elsif ($url =~ /cio-online\.com/) { $author_patern = 'par (.*)'; $title_patern = '