Example
Web Log Analysis – Count 404 Errors
Input Sample (webserver.log)
127.0.0.1 - - [23/Jul/2024:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1024
127.0.0.1 - - [23/Jul/2024:10:01:00 +0000] "GET /notfound.html HTTP/1.1" 404 512
127.0.0.1 - - [23/Jul/2024:10:02:00 +0000] "GET /page.html HTTP/1.1" 404 0Solution 1: Using Spring libraries
Sample Log File (webserver.log)
webserver.log)127.0.0.1 - - [23/Jul/2024:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1024
127.0.0.1 - - [23/Jul/2024:10:01:00 +0000] "GET /notfound.html HTTP/1.1" 404 512
127.0.0.1 - - [23/Jul/2024:10:02:00 +0000] "GET /page.html HTTP/1.1" 404 0Java Code: Simulating MapReduce
package com.example.logprocessor;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
@SpringBootApplication
public class WebLog404CounterApp implements CommandLineRunner {
public static void main(String[] args) {
SpringApplication.run(WebLog404CounterApp.class, args);
}
@Override
public void run(String... args) throws Exception {
AtomicInteger count404 = new AtomicInteger(0);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(
getClass().getClassLoader().getResourceAsStream("webserver.log")))) {
Stream<String> lines = reader.lines();
// Mapper + Reducer (Combined using Stream API)
lines.parallel()
.map(line -> {
String[] tokens = line.split(" ");
if (tokens.length > 8 && "404".equals(tokens[8])) {
return 1; // 404 found
} else {
return 0;
}
})
.forEach(count -> count404.addAndGet(count));
System.out.println("Total 404 Errors: " + count404.get());
}
}
}Output
Sales Aggregation – Total Sales per Product
Input Sample (sales.txt)
Solution 1: Using Hadoop
1. Mapper Class
2. Reducer Class
3. Driver Class
Running the MapReduce Job
Output
Last updated