2021-04-22 02:36:11 +02:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
|
|
|
|
*
|
|
|
|
* This file is part of Logar, simple tool to manage http log files.
|
|
|
|
*
|
|
|
|
* Logar is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Affero General Public License as
|
|
|
|
* published by the Free Software Foundation, either version 3 of the
|
|
|
|
* License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* Logar is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Affero General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
2021-04-22 18:11:27 +02:00
|
|
|
package fr.devinsy.logar.app.anonymizer;
|
2021-04-22 02:36:11 +02:00
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.FileOutputStream;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.PrintWriter;
|
2021-04-24 16:48:30 +02:00
|
|
|
import java.net.URLEncoder;
|
|
|
|
import java.nio.charset.StandardCharsets;
|
2021-04-22 02:36:11 +02:00
|
|
|
import java.time.format.DateTimeParseException;
|
2021-04-22 18:32:56 +02:00
|
|
|
import java.util.regex.Matcher;
|
2021-04-22 18:11:27 +02:00
|
|
|
import java.util.regex.Pattern;
|
2021-04-22 02:36:11 +02:00
|
|
|
import java.util.zip.GZIPOutputStream;
|
|
|
|
|
|
|
|
import org.apache.commons.io.IOUtils;
|
2021-04-22 18:11:27 +02:00
|
|
|
import org.apache.commons.lang3.StringUtils;
|
2021-04-22 02:36:11 +02:00
|
|
|
import org.april.logar.util.LineIterator;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
import fr.devinsy.logar.app.log.Log;
|
2021-04-24 16:48:30 +02:00
|
|
|
import fr.devinsy.logar.app.log.LogParser;
|
2021-04-22 02:36:11 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The Class Anonymizer.
|
|
|
|
*/
|
2021-04-22 18:11:27 +02:00
|
|
|
/**
|
|
|
|
* @author cpm
|
|
|
|
*
|
|
|
|
*/
|
2021-04-22 02:36:11 +02:00
|
|
|
public final class Anonymizer
|
|
|
|
{
|
|
|
|
private static Logger logger = LoggerFactory.getLogger(Anonymizer.class);
|
|
|
|
|
2021-04-22 18:11:27 +02:00
|
|
|
public static final Pattern IPV4_PATTERN = Pattern.compile("\\d{0,3}\\.\\d{0,3}\\.\\d{0,3}\\.\\d{0,3}");
|
|
|
|
public static final Pattern IPV6_PATTERN = Pattern.compile("([0-9a-f]{1,4}:{1,2}){4,7}([0-9a-f]){1,4}", Pattern.CASE_INSENSITIVE);
|
|
|
|
|
2021-04-22 02:36:11 +02:00
|
|
|
private AnonMap map;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Instantiates a new anonymizer.
|
|
|
|
*/
|
|
|
|
public Anonymizer()
|
|
|
|
{
|
|
|
|
this.map = new AnonMap();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Anonymize.
|
|
|
|
*
|
|
|
|
* @param source
|
|
|
|
* the source
|
|
|
|
* @param target
|
|
|
|
* the target
|
|
|
|
*/
|
2021-04-22 18:32:56 +02:00
|
|
|
public void anonymize(final File source)
|
2021-04-22 02:36:11 +02:00
|
|
|
{
|
|
|
|
if (source == null)
|
|
|
|
{
|
|
|
|
throw new IllegalArgumentException("Null parameter.");
|
|
|
|
}
|
|
|
|
else if (!source.isFile())
|
|
|
|
{
|
|
|
|
throw new IllegalArgumentException("Parameter is not a file.");
|
|
|
|
}
|
2021-04-22 18:11:27 +02:00
|
|
|
else if (!StringUtils.containsAny(source.getName(), "access", "error"))
|
|
|
|
{
|
|
|
|
throw new IllegalArgumentException("File name does not contain 'access' or 'error'.");
|
|
|
|
}
|
2021-04-22 02:36:11 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
System.out.println("== Anonymize log for [" + source.getName() + "]");
|
|
|
|
|
2021-04-22 18:11:27 +02:00
|
|
|
boolean isAccessFile;
|
|
|
|
if (source.getName().contains("access"))
|
|
|
|
{
|
|
|
|
isAccessFile = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
isAccessFile = false;
|
|
|
|
}
|
|
|
|
|
2021-04-22 02:36:11 +02:00
|
|
|
File target;
|
|
|
|
if (source.getName().endsWith(".log.gz"))
|
|
|
|
{
|
|
|
|
target = new File(source.getParentFile(), source.getName().replace(".log.gz", "-anon.log.gz"));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
target = new File(source.getParentFile(), source.getName().replace(".log", "-anon.log"));
|
|
|
|
}
|
|
|
|
|
|
|
|
PrintWriter out = null;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
LineIterator iterator = new LineIterator(source);
|
|
|
|
out = new PrintWriter(new GZIPOutputStream(new FileOutputStream(target)));
|
|
|
|
while (iterator.hasNext())
|
|
|
|
{
|
|
|
|
String line = iterator.next();
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
2021-04-22 18:11:27 +02:00
|
|
|
Log anon;
|
|
|
|
if (isAccessFile)
|
|
|
|
{
|
2021-04-24 16:48:30 +02:00
|
|
|
Log log = LogParser.parseAccessLog(line);
|
2021-04-22 18:11:27 +02:00
|
|
|
// logger.info("line={}", line);
|
|
|
|
// logger.info("log =[{}][{}][{}]", log.getIp(),
|
|
|
|
// log.getUser(), log.getDatetime());
|
2021-04-22 02:36:11 +02:00
|
|
|
|
2021-04-22 18:11:27 +02:00
|
|
|
anon = anonymizeAccess(log);
|
|
|
|
// logger.info("anon=[{}][{}][{}]", anon.getIp(),
|
|
|
|
// anon.getUser(), anon.getDatetime());
|
|
|
|
// logger.info("anon={}", anon);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-04-24 16:48:30 +02:00
|
|
|
Log log = LogParser.parseErrorLog(line);
|
2021-04-22 18:11:27 +02:00
|
|
|
|
|
|
|
anon = anonymizeError(log);
|
|
|
|
}
|
2021-04-22 02:36:11 +02:00
|
|
|
|
|
|
|
out.println(anon);
|
|
|
|
}
|
|
|
|
catch (IllegalArgumentException exception)
|
|
|
|
{
|
|
|
|
System.out.println("Bad format line: " + line);
|
|
|
|
exception.printStackTrace();
|
|
|
|
}
|
|
|
|
catch (DateTimeParseException exception)
|
|
|
|
{
|
|
|
|
System.out.println("Bad datetime format: " + line);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (IOException exception)
|
|
|
|
{
|
|
|
|
System.err.println("Error with file [" + source.getAbsolutePath() + "]");
|
|
|
|
exception.printStackTrace();
|
|
|
|
}
|
|
|
|
finally
|
|
|
|
{
|
|
|
|
IOUtils.closeQuietly(out);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-04-22 18:32:56 +02:00
|
|
|
* Anonymize.
|
2021-04-22 02:36:11 +02:00
|
|
|
*
|
|
|
|
* @param log
|
|
|
|
* the log
|
|
|
|
* @return the log
|
|
|
|
*/
|
2021-04-22 18:32:56 +02:00
|
|
|
public Log anonymizeAccess(final Log log)
|
2021-04-22 02:36:11 +02:00
|
|
|
{
|
|
|
|
Log result;
|
|
|
|
|
2021-04-24 16:48:30 +02:00
|
|
|
result = new Log(log);
|
2021-04-22 02:36:11 +02:00
|
|
|
|
2021-04-24 16:48:30 +02:00
|
|
|
result.setIp(this.map.anonymizeIp(log.getIp()));
|
|
|
|
result.setUser(this.map.anonymizeUser(log.getUser()));
|
|
|
|
|
|
|
|
// Anonymize ip.
|
|
|
|
result.setRequest(result.getRequest().replace(result.getIp(), result.getIp()));
|
|
|
|
result.setReferer(result.getReferer().replace(result.getIp(), result.getIp()));
|
|
|
|
|
|
|
|
// Anonymize user.
|
2021-04-22 02:36:11 +02:00
|
|
|
if (!log.getUser().equals("-"))
|
|
|
|
{
|
2021-04-24 16:48:30 +02:00
|
|
|
// URLEncode replaces ' ' with '+' so bad for us.
|
|
|
|
String userInUrl = URLEncoder.encode(log.getUser(), StandardCharsets.UTF_8).replace("+", "%20");
|
|
|
|
|
|
|
|
result.setRequest(result.getRequest().replace(userInUrl, result.getUser()));
|
|
|
|
result.setReferer(result.getReferer().replace(userInUrl, result.getUser()));
|
2021-04-22 02:36:11 +02:00
|
|
|
}
|
|
|
|
|
2021-04-24 16:48:30 +02:00
|
|
|
result.concateAccessLog();
|
2021-04-22 02:36:11 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-04-22 18:32:56 +02:00
|
|
|
/**
|
|
|
|
* Anonymize error.
|
|
|
|
*
|
|
|
|
* @param log
|
|
|
|
* the log
|
|
|
|
* @return the log
|
|
|
|
*/
|
|
|
|
public Log anonymizeError(final Log log)
|
|
|
|
{
|
|
|
|
Log result;
|
|
|
|
|
|
|
|
// Search and anonymized Ipv4 addresses.
|
|
|
|
Matcher matcher = IPV4_PATTERN.matcher(log.getLine());
|
|
|
|
String anonLine = log.getLine();
|
|
|
|
while (matcher.find())
|
|
|
|
{
|
|
|
|
String left = anonLine.substring(0, matcher.start());
|
|
|
|
String ipv4 = matcher.group();
|
|
|
|
String right = anonLine.substring(matcher.end());
|
|
|
|
|
|
|
|
String anonIpv4 = this.map.get(ipv4);
|
|
|
|
anonLine = left + anonIpv4 + right;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search and anonymized Ipv4 addresses.
|
|
|
|
matcher = IPV6_PATTERN.matcher(anonLine);
|
|
|
|
while (matcher.find())
|
|
|
|
{
|
|
|
|
String left = anonLine.substring(0, matcher.start());
|
|
|
|
String ipv6 = matcher.group();
|
|
|
|
String right = anonLine.substring(matcher.end());
|
|
|
|
|
|
|
|
String anonIpv6 = this.map.get(ipv6);
|
|
|
|
anonLine = left + anonIpv6 + right;
|
|
|
|
}
|
|
|
|
|
|
|
|
result = new Log(anonLine, log.getDatetime());
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-04-22 02:36:11 +02:00
|
|
|
/**
|
|
|
|
* Gets the map table.
|
|
|
|
*
|
|
|
|
* @return the map table
|
|
|
|
*/
|
|
|
|
public AnonMap getMapTable()
|
|
|
|
{
|
|
|
|
AnonMap result;
|
|
|
|
|
|
|
|
result = this.map;
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Inits the map.
|
|
|
|
*
|
|
|
|
* @param source
|
|
|
|
* the source
|
|
|
|
*/
|
|
|
|
public void loadMapTable(final File source)
|
|
|
|
{
|
|
|
|
if (source != null)
|
|
|
|
{
|
|
|
|
this.map.addAll(AnonMapFile.load(source));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Save map table.
|
|
|
|
*
|
|
|
|
* @param target
|
|
|
|
* the target
|
|
|
|
*/
|
|
|
|
public void SaveMapTable(final File target)
|
|
|
|
{
|
|
|
|
if (target != null)
|
|
|
|
{
|
|
|
|
AnonMapFile.save(target, this.map);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|