liupengjie - 2013-11-01

[hadoop@Master hadoop]$ bin/hadoop jar wikipedia-miner-hadoop.jar org.wikipedia.miner.extraction.DumpExtractor input/enwiki-20130503-pages-articles.xml input/languages.xml en input/en-sent.bin output
13/11/01 15:20:37 INFO extraction.DumpExtractor: Extracting site info
13/11/01 15:20:37 INFO extraction.DumpExtractor: Starting page step
13/11/01 15:20:37 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
13/11/01 15:20:37 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
13/11/01 15:20:37 INFO mapred.FileInputFormat: Total input paths to process : 1
13/11/01 15:20:38 INFO mapred.JobClient: Running job: job_201311011519_0001
13/11/01 15:20:39 INFO mapred.JobClient: map 0% reduce 0%
13/11/01 15:20:48 INFO mapred.JobClient: Task Id : attempt_201311011519_0001_m_000654_0, Status : FAILED
java.io.IOException: Task process exit with nonzero status of 1.
at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:418)

13/11/01 15:20:48 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000654_0&filter=stdout
13/11/01 15:20:48 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000654_0&filter=stderr
13/11/01 15:20:54 INFO mapred.JobClient: Task Id : attempt_201311011519_0001_m_000654_1, Status : FAILED
java.io.IOException: Task process exit with nonzero status of 1.
at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:418)

13/11/01 15:20:54 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000654_1&filter=stdout
13/11/01 15:20:54 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000654_1&filter=stderr
13/11/01 15:21:00 INFO mapred.JobClient: Task Id : attempt_201311011519_0001_m_000654_2, Status : FAILED
java.io.IOException: Task process exit with nonzero status of 1.
at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:418)

13/11/01 15:21:00 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000654_2&filter=stdout
13/11/01 15:21:00 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000654_2&filter=stderr
13/11/01 15:21:12 INFO mapred.JobClient: Task Id : attempt_201311011519_0001_m_000653_0, Status : FAILED
java.io.IOException: Task process exit with nonzero status of 1.
at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:418)

13/11/01 15:21:12 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000653_0&filter=stdout
13/11/01 15:21:12 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000653_0&filter=stderr
13/11/01 15:21:17 INFO mapred.JobClient: Task Id : attempt_201311011519_0001_m_000653_1, Status : FAILED
java.io.IOException: Task process exit with nonzero status of 1.
at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:418)

13/11/01 15:21:17 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000653_1&filter=stdout
13/11/01 15:21:17 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000653_1&filter=stderr
13/11/01 15:21:23 INFO mapred.JobClient: Task Id : attempt_201311011519_0001_m_000653_2, Status : FAILED
java.io.IOException: Task process exit with nonzero status of 1.
at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:418)

13/11/01 15:21:23 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000653_2&filter=stdout
13/11/01 15:21:23 WARN mapred.JobClient: Error reading task outputhttp://Master.Hadoop:50060/tasklog?plaintext=true&taskid=attempt_201311011519_0001_m_000653_2&filter=stderr
13/11/01 15:21:29 INFO mapred.JobClient: Job complete: job_201311011519_0001
13/11/01 15:21:29 INFO mapred.JobClient: Counters: 0
Exception in thread "main" java.io.IOException: Job failed!
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252)
at org.wikipedia.miner.extraction.PageStep.run(Unknown Source)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at org.wikipedia.miner.extraction.DumpExtractor.run(Unknown Source)
at org.wikipedia.miner.extraction.DumpExtractor.main(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)