#! /usr/bin/perl # Name : parse_known_sites # Author : theocrite # Description : parses (gets author, date & title) of # pages grabbed by alexandrie # use strict; use LWP::Simple; use DBI; my $dsn = 'DBI:mysql:drupal6:172.16.0.7'; my $db_user_name = 'drupal'; my $db_password = 'xxxxxxx'; sub parse_page { my $url=shift; my $id =shift; print "parsing $url\n"; my $title_patern = ''; my $author_patern = ''; my $date_patern = ''; my $site = ''; if ($url =~ /pcinpact\.com/) { $author_patern = ''; $title_patern = '
par (.*)'; $title_patern = '
(.*)
'; $site = 'Silicon.fr'; } elsif ($url =~ /cio-online\.com/) { $author_patern = 'par (.*)'; $title_patern = '