Skip to content

Commit f65371d

Browse files
Merge pull request #870 from igiguere/NUTCH-2971
Fix for NUTCH-2971: Unt tests fail with JDK 17
2 parents 1156801 + f43ff78 commit f65371d

File tree

8 files changed

+483
-151
lines changed

8 files changed

+483
-151
lines changed

build.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@
8686
<ivy:dependencytree />
8787
</target>
8888

89+
<target name="dependencytests" depends="resolve-test" description="Show unit tests dependency tree">
90+
<ivy:dependencytree />
91+
</target>
92+
8993
<!-- ====================================================== -->
9094
<!-- Stuff needed by all targets -->
9195
<!-- ====================================================== -->

ivy/ivy.xml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,6 @@
122122
<!-- Required for JUnit 5 (Jupiter) test execution -->
123123
<dependency org="org.junit.jupiter" name="junit-jupiter-engine" rev="5.13.4" conf="test->default"/>
124124
<dependency org="org.junit.jupiter" name="junit-jupiter-api" rev="5.13.4" conf="test->default"/>
125-
<dependency org="org.apache.mrunit" name="mrunit" rev="1.1.0" conf="test->default">
126-
<artifact name="mrunit" ns0:classifier="hadoop2" />
127-
<exclude org="log4j" module="log4j" />
128-
<exclude org="junit" module="junit" />
129-
<exclude org="org.powermock" module="powermock-module-junit4" />
130-
<exclude org="com.google.guava" name="guava" />
131-
</dependency>
132125

133126
<!-- Jetty used to serve test pages for unit tests, but is also provided as dependency of Hadoop -->
134127
<dependency org="org.eclipse.jetty" name="jetty-server" rev="10.0.25" conf="test->default">

src/plugin/build-plugin.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,4 +282,7 @@
282282
<target name="dependencytree" depends="resolve-default" description="Show dependency tree">
283283
<ivy:dependencytree />
284284
</target>
285+
<target name="dependencytests" depends="resolve-test" description="Show unit tests dependency tree">
286+
<ivy:dependencytree />
287+
</target>
285288
</project>

src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttpByProxy.java

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.nutch.protocol.Content;
2323
import org.apache.nutch.protocol.ProtocolOutput;
2424
import org.apache.nutch.protocol.ProtocolStatus;
25+
import org.junit.jupiter.api.AfterEach;
2526
import org.junit.jupiter.api.BeforeEach;
2627
import org.junit.jupiter.api.Test;
2728
import org.littleshoot.proxy.HttpProxyServer;
@@ -30,26 +31,43 @@
3031
import static org.junit.jupiter.api.Assertions.assertNotNull;
3132
import static org.junit.jupiter.api.Assertions.assertTrue;
3233

34+
import java.io.IOException;
35+
import java.net.ServerSocket;
36+
3337
/**
3438
* Test cases for protocol-http by proxy
3539
*/
3640
public class TestProtocolHttpByProxy extends AbstractHttpProtocolPluginTest {
3741

3842
public static final String PROXY_HOST = "localhost";
39-
public static final Integer PROXY_PORT = 8888;
43+
public Integer proxyPort = 8888;
4044

4145
public static final String TARGET_HOST = "www.baidu.com";
4246
public static final Integer TARGET_PORT = 443;
47+
48+
private HttpProxyServer server;
4349

4450
@BeforeEach
4551
public void setUp() throws Exception {
4652
super.setUp();
53+
proxyPort = findOpenPort();
4754
conf.set("http.proxy.host", PROXY_HOST);
48-
conf.set("http.proxy.port", PROXY_PORT.toString());
55+
conf.set("http.proxy.port", proxyPort.toString());
4956
http.setConf(conf);
5057

51-
HttpProxyServer server = DefaultHttpProxyServer.bootstrap()
52-
.withPort(PROXY_PORT).start();
58+
server = DefaultHttpProxyServer.bootstrap()
59+
.withPort(proxyPort).start();
60+
}
61+
62+
private Integer findOpenPort() throws IOException {
63+
try (ServerSocket socket = new ServerSocket(0)) {
64+
return socket.getLocalPort();
65+
}
66+
}
67+
68+
@AfterEach
69+
public void tearDown() {
70+
server.stop();
5371
}
5472

5573
@Override

src/test/org/apache/nutch/crawl/CrawlDbUpdateTestDriver.java

Lines changed: 0 additions & 120 deletions
This file was deleted.

src/test/org/apache/nutch/crawl/TestCrawlDbStates.java

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.hadoop.util.StringUtils;
2424
import org.apache.nutch.scoring.ScoringFilterException;
2525
import org.apache.nutch.scoring.ScoringFilters;
26+
import org.apache.nutch.util.ReducerContextWrapper;
2627
import org.junit.jupiter.api.Test;
2728
import org.slf4j.Logger;
2829
import org.slf4j.LoggerFactory;
@@ -31,8 +32,10 @@
3132
import java.lang.invoke.MethodHandles;
3233
import java.util.ArrayList;
3334
import java.util.Date;
35+
import java.util.HashMap;
3436
import java.util.Iterator;
3537
import java.util.List;
38+
import java.util.Map;
3639

3740
import static org.apache.nutch.crawl.CrawlDatum.*;
3841
import static org.junit.jupiter.api.Assertions.fail;
@@ -196,13 +199,13 @@ public void testCrawlDbStateTransitionMatrix() {
196199
* already in CrawlDb. Newly injected elements have status "db_unfetched".
197200
* Inject is simulated by calling {@link Injector.InjectReducer#reduce()}.
198201
*/
202+
@SuppressWarnings({ "unchecked", "rawtypes" })
199203
@Test
200204
public void testCrawlDbStatTransitionInject() {
201205
LOG.info("Test CrawlDatum states in Injector after inject");
202206
Configuration conf = CrawlDBTestUtil.createContext().getConfiguration();
203207
Injector.InjectReducer injector = new Injector.InjectReducer();
204-
CrawlDbUpdateTestDriver<Injector.InjectReducer> injectDriver =
205-
new CrawlDbUpdateTestDriver<Injector.InjectReducer>(injector, conf);
208+
206209
ScoringFilters scfilters = new ScoringFilters(conf);
207210
for (String sched : schedules) {
208211
LOG.info("Testing inject with {}", sched);
@@ -234,12 +237,29 @@ public void testCrawlDbStatTransitionInject() {
234237
LOG.error(StringUtils.stringifyException(e));
235238
}
236239
values.add(injected);
237-
List<CrawlDatum> res = injectDriver.update(values);
238-
if (res.size() != 1) {
240+
241+
List<CrawlDatum> result = new ArrayList<CrawlDatum>();
242+
Map<Text, CrawlDatum> res = new HashMap<>();
243+
ReducerContextWrapper contextWrapper = new ReducerContextWrapper(injector, conf, res);
244+
try {
245+
injector.setup(contextWrapper.getContext());
246+
// test
247+
injector.reduce(CrawlDbUpdateUtil.dummyURL, values, contextWrapper.getContext());
248+
249+
for (Map.Entry<Text, CrawlDatum> e : res.entrySet()) {
250+
if (e.getKey().equals(CrawlDbUpdateUtil.dummyURL)) {
251+
result.add(e.getValue());
252+
}
253+
}
254+
} catch (IOException | InterruptedException e) {
255+
LOG.error(StringUtils.stringifyException(e));
256+
}
257+
258+
if (result.size() != 1) {
239259
fail("Inject didn't result in one single CrawlDatum per URL");
240260
continue;
241261
}
242-
byte status = res.get(0).getStatus();
262+
byte status = result.get(0).getStatus();
243263
if (status != toDbStatus) {
244264
fail("Inject for "
245265
+ (fromDbStatus == -1 ? "" : getStatusName(fromDbStatus)

src/test/org/apache/nutch/indexer/TestIndexerMapReduce.java

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
import org.apache.commons.codec.binary.Base64;
2020
import org.apache.hadoop.conf.Configuration;
2121
import org.apache.hadoop.io.Text;
22-
import org.apache.hadoop.mapreduce.Reducer;
23-
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
24-
import org.apache.hadoop.mrunit.types.Pair;
2522
import org.apache.hadoop.util.StringUtils;
2623
import org.apache.nutch.crawl.CrawlDatum;
2724
import org.apache.nutch.crawl.NutchWritable;
@@ -33,6 +30,7 @@
3330
import org.apache.nutch.parse.ParseText;
3431
import org.apache.nutch.protocol.Content;
3532
import org.apache.nutch.util.NutchConfiguration;
33+
import org.apache.nutch.util.ReducerContextWrapper;
3634
import org.junit.jupiter.api.Test;
3735
import org.slf4j.Logger;
3836
import org.slf4j.LoggerFactory;
@@ -42,7 +40,9 @@
4240
import java.nio.charset.Charset;
4341
import java.nio.charset.StandardCharsets;
4442
import java.util.ArrayList;
43+
import java.util.HashMap;
4544
import java.util.List;
45+
import java.util.Map;
4646

4747
import static org.junit.jupiter.api.Assertions.assertEquals;
4848
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -89,8 +89,8 @@ public class TestIndexerMapReduce {
8989
public static CrawlDatum crawlDatumFetchSuccess = new CrawlDatum(
9090
CrawlDatum.STATUS_FETCH_SUCCESS, 60 * 60 * 24);
9191

92-
private Reducer<Text, NutchWritable, Text, NutchIndexAction> reducer = new IndexerMapReduce.IndexerReducer();
93-
private ReduceDriver<Text, NutchWritable, Text, NutchIndexAction> reduceDriver;
92+
private IndexerMapReduce.IndexerReducer reducer = new IndexerMapReduce.IndexerReducer();
93+
9494
private Configuration configuration;
9595

9696

@@ -101,6 +101,9 @@ public class TestIndexerMapReduce {
101101
public void testBinaryContentBase64() {
102102
configuration = NutchConfiguration.create();
103103
configuration.setBoolean(IndexerMapReduce.INDEXER_BINARY_AS_BASE64, true);
104+
105+
// unrelated issue with "index.jexl.filter", don't use all plugins. Ref: src/test/nutch-site.xml
106+
configuration.set("plugin.includes", "protocol-http|urlfilter-regex|parse-(html|tika)|index-(basic|anchor)|indexer-csv|scoring-opic|urlnormalizer-(pass|regex|basic)");
104107

105108
Charset[] testCharsets = { StandardCharsets.UTF_8,
106109
Charset.forName("iso-8859-1"), Charset.forName("iso-8859-2") };
@@ -155,7 +158,10 @@ public void testBinaryContentBase64() {
155158
* @param content
156159
* (optional, if index binary content) protocol content
157160
* @return &quot;indexed&quot; document
161+
* @throws InterruptedException
162+
* @throws IOException
158163
*/
164+
@SuppressWarnings({ "unchecked", "rawtypes" })
159165
public NutchDocument runIndexer(CrawlDatum dbDatum, CrawlDatum fetchDatum,
160166
ParseText parseText, ParseData parseData, Content content) {
161167
List<NutchWritable> values = new ArrayList<NutchWritable>();
@@ -164,19 +170,20 @@ public NutchDocument runIndexer(CrawlDatum dbDatum, CrawlDatum fetchDatum,
164170
values.add(new NutchWritable(parseText));
165171
values.add(new NutchWritable(parseData));
166172
values.add(new NutchWritable(content));
167-
reduceDriver = ReduceDriver.newReduceDriver(reducer);
168-
reduceDriver.getConfiguration().addResource(configuration);
169-
reduceDriver.withInput(testUrlText, values);
170-
List<Pair<Text, NutchIndexAction>> reduceResult;
173+
Map<Text, NutchIndexAction> reduceResult = new HashMap<>();
174+
ReducerContextWrapper contextWrapper = new ReducerContextWrapper(reducer, configuration, reduceResult);
171175
NutchDocument doc = null;
172-
try {
173-
reduceResult = reduceDriver.run();
174-
for (Pair<Text, NutchIndexAction> p : reduceResult) {
175-
if (p.getSecond().action != NutchIndexAction.DELETE) {
176-
doc = p.getSecond().doc;
176+
try {
177+
reducer.setup(contextWrapper.getContext());
178+
// test
179+
reducer.reduce(testUrlText, values, contextWrapper.getContext());
180+
181+
for (Map.Entry<Text, NutchIndexAction> e : reduceResult.entrySet()) {
182+
if (e.getValue().action != NutchIndexAction.DELETE) {
183+
doc = e.getValue().doc;
177184
}
178185
}
179-
} catch (IOException e) {
186+
} catch (IOException | InterruptedException e) {
180187
LOG.error(StringUtils.stringifyException(e));
181188
}
182189
return doc;

0 commit comments

Comments
 (0)