From: <sta...@us...> - 2007-03-21 19:14:49
|
Revision: 1623 http://archive-access.svn.sourceforge.net/archive-access/?rev=1623&view=rev Author: stack-sf Date: 2007-03-21 12:13:23 -0700 (Wed, 21 Mar 2007) Log Message: ----------- M nutchwax/src/main/assembly/distribution.xml Change where we fetch job and war jars from. M nutchwax/nutchwax-thirdparty/pom.xml D nutchwax/build.xml Removed build.xml. Remove misimpression that you can use ant to build this project. Call third.party build.xml directly instead. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/nutchwax-thirdparty/pom.xml trunk/archive-access/projects/nutchwax/src/main/assembly/distribution.xml Removed Paths: ------------- trunk/archive-access/projects/nutchwax/build.xml Deleted: trunk/archive-access/projects/nutchwax/build.xml =================================================================== --- trunk/archive-access/projects/nutchwax/build.xml 2007-03-21 17:06:07 UTC (rev 1622) +++ trunk/archive-access/projects/nutchwax/build.xml 2007-03-21 19:13:23 UTC (rev 1623) @@ -1,342 +0,0 @@ -<?xml version="1.0"?> - -<project name="nutchwax" default="all"> - <property name="name" value="${ant.project.name}"/> - <property name="root" value="${basedir}"/> - - <!--'nutch.root' is pointer at core nutch. Expect to find it in - '${basedir}/third-party' named 'nutch'. - --> - <!--Keep this aligned with whats in maven2 pom--> - <property name="nutchwax.version" value="-0.11.0-SNAPSHOT"/> - <property name="nutch.root" location="${root}/third-party/nutch"/> - - <property file="${user.home}/.$(name}.build.properties" /> - - <property name="src.dir" location="${root}/src/java"/> - <property name="src.test" location="${root}/src/test"/> - - <available file="${src.test}" type="dir" property="test.available"/> - - <property name="conf.dir" location="${root}/conf"/> - - <property name="build.dir" location="${root}/target"/> - <property name="build.classes" location="${build.dir}/classes"/> - <property name="build.test" location="${build.dir}/test"/> - - <property name="build.plugins" location="${nutch.root}/build/plugins"/> - <property name="deploy.dir" location="${build.plugins}/${name}"/> - - <property name="this.web" location="${root}/src/web"/> - <property name="nutch.web" location="${nutch.root}/src/web"/> - - <property name="javac.deprecation" value="off"/> - <property name="javac.debug" value="on"/> - - <property name="javadoc.link" - value="http://java.sun.com/j2se/1.5/docs/api/"/> - - <property name="build.encoding" value="ISO-8859-1"/> - - <!-- the normal classpath --> - <path id="classpath"> - <pathelement location="${build.classes}"/> - <pathelement location="${nutch.root}/build/classes"/> - <fileset dir="${nutch.root}/lib"> - <include name="*.jar" /> - </fileset> - </path> - - <!-- the unit test classpath --> - <path id="test.classpath"> - <pathelement location="${build.test}" /> - <pathelement location="${conf.dir}"/> - <pathelement location="${nutch.root}/conf"/> - <pathelement location="${nutch.root}/build"/> - <path refid="classpath"/> - </path> - - <target name="third.party.plugins" description="Build third-party plugins"> - <echo message="Building nutch third-party dependency (plugins)" /> - <ant dir="third-party/nutch" target="compile-plugins" inheritAll="false" > - <property name="build.compiler" value="extJavac" /> - </ant> - </target> - <target name="third.party.compile" description="Compile third-party src"> - <echo message="Building nutch third-party dependency (compile)" /> - <ant dir="third-party/nutch" target="compile" inheritAll="false" > - <property name="build.compiler" value="extJavac" /> - </ant> - </target> - <target name="third.party.jar" description="Build third-party jars" > - <echo message="Building nutch third-party dependency (jar)" /> - <ant dir="third-party/nutch" target="jar" inheritAll="false" > - <property name="build.compiler" value="extJavac" /> - </ant> - </target> - <target name="third.party.war" description="Build third-party wars" > - <echo message="Building nutch third-party dependency (war)" /> - <ant dir="third-party/nutch" target="war" inheritAll="false" > - <property name="build.compiler" value="extJavac" /> - </ant> - </target> - <target name="third.party.clean" description="Clean third-party software"> - <echo message="Cleaning nutch third-party dependency" /> - <ant dir="third-party/nutch" target="clean" inheritAll="false" > - <property name="build.compiler" value="extJavac" /> - </ant> - </target> - - <!-- ====================================================== --> - <!-- Stuff needed by all targets --> - <!-- ====================================================== --> - <target name="init"> - <mkdir dir="${build.dir}"/> - <mkdir dir="${build.classes}"/> - <mkdir dir="${build.test}"/> - </target> - - <!-- ====================================================== --> - <!-- Compile the Java files --> - <!-- ====================================================== --> - <target name="compile" depends="init" - description="Compile nutchwax classes"> - <property name="build.compiler" value="extJavac" /> - <javac - encoding="${build.encoding}" - srcdir="${src.dir}" - includes="**/*.java" - destdir="${build.classes}" - debug="${javac.debug}" - target="1.5" - source="1.5" - deprecation="${javac.deprecation}"> - <classpath refid="classpath"/> - </javac> - </target> - - <!-- ====================================================== --> - <!-- Compile plugins --> - <!-- ====================================================== --> - <target name="compile-plugins" - description="Compile all nutchwax plugins"> - <ant dir="src/plugin" target="deploy" inheritAll="false"> - <property name="build.compiler" value="extJavac" /> - </ant> - </target> - - <!-- ================================================================== --> - <!-- Make job jar --> - <!-- ================================================================== --> - <!-- --> - <!-- ================================================================== --> - <target name="jar" depends="compile, compile-plugins" - description="Builds nutchwax jobs jar of all tasks to do import, etc." > - <zip destfile="${build.dir}/${name}-job${nutchwax.version}.jar"> - <zipfileset prefix="META-INF" file="${conf.dir}/MANIFEST.MF"/> - <zipfileset file="${conf.dir}/log4j.properties"/> - <zipfileset file="${conf.dir}/wax-parse-plugins.xml"/> - <zipfileset file="${conf.dir}/wax-default.xml"/> - <zipfileset file="${conf.dir}/regex-normalize.xml"/> - <zipfileset file="${conf.dir}/regex-urlfilter.txt"/> - <zipfileset file="${nutch.root}/conf/mime-types.xml"/> - <zipfileset file="${nutch.root}/conf/nutch-default.xml"/> - <zipfileset file="${nutch.root}/conf/common-terms.utf8"/> - <zipfileset prefix="bin" file="${basedir}/src/plugin/parse-waxext/bin/parse-pdf.sh" filemode="555"/> - <!--<zipfileset refid="lib.jars"/> - --> - - <!--Include all class files both nutch and nutchwax at top level - so all needed to launch a job using the 'hadoop jar nutchwax.jobs' - is on the classpath (Only classes that are at top-level in a jar can - be found on CLASSPATH. Jars inside jars or classes under 'classes' - directory cannot be found or added to CLASSPATH, not without custom - classloader: See - http://java.sun.com/docs/books/tutorial/deployment/jar/downman.html). - --> - <zipfileset dir="${build.dir}/classes" /> - <zipfileset dir="${nutch.root}/build/classes" /> - <!-- Be selective about which plugins to copy over. Otherwise - the jar gets massive (16Megs with all plugins. 10Megs not - including plugins used at other than indexing time). - - Include query-time filters for case where we're running in - distributed mode. - - Note, we EXCLUDE parse-js. Otherwise, its run as part of - html parse. We don't want this because the parse-js currently - adds base url as anchor text polluting the linkdb and its kinda - messy regards URLs it finds in javascript. It needs some work. - Meantime, we'll do w/o the URLs it finds in linkdb. See - NUTCH-425 and - http://sourceforge.net/tracker/index.php?func=detail&aid=1591709&group_id=118427&atid=681137 - --> - <zipfileset prefix="plugins" dir="${nutch.root}/build/plugins"> - <!-- See above why we exclude parse-js--> - <exclude name="parse-js/**" /> - <include name="analysis-*/**" /> - <include name="index-*/**" /> - <include name="language-*/**" /> - <include name="lib-*/**" /> - <include name="nutch-*/**" /> - <include name="scoring-*/**" /> - <include name="query-*/**" /> - <include name="summary-*/**" /> - <include name="urlfilter-*/**" /> - <include name="urlnormalizer-*/**" /> - <include name="parse-*/**" /> - </zipfileset> - <!--Add wax plugins--> - <zipfileset prefix="wax-plugins" dir="${build.dir}/wax-plugins"> - <include name="*/**" /> - </zipfileset> - <!--Include nutch dependencies in job jar. --> - <zipfileset prefix="lib" file="${nutch.root}/lib/commons-lang*jar"/> - <zipfileset prefix="lib" file="${nutch.root}/lib/lucene*jar"/> - <zipfileset prefix="lib" file="${nutch.root}/lib/jakarta-oro*jar"/> - <zipfileset prefix="lib" file="${nutch.root}/lib/xerces*jar"/> - <zipfileset prefix="lib" file="${nutch.root}/lib//concurrent-1.3.4.jar"/> - <!--Finally, include the README.txt file so can tell what - hadoop and nutch this was built against--> - <zipfileset file="${root}/README.txt"/> - </zip> - </target> - - <!-- ================================================================== --> - <!-- Build all including third-party dependencies (i.e. nutch) --> - <!-- ================================================================== --> - <!-- --> - <!-- ================================================================== --> - <target name="all" depends="third.party.jar,third.party.war,jar,compile,war" /> - - <!-- ================================================================== --> - <!-- Compile test code --> - <!-- ================================================================== --> - <target name="compile-test" depends="compile" if="test.available"> - <javac - encoding="${build.encoding}" - srcdir="${src.test}" - includes="**/*.java" - destdir="${build.test}" - debug="${debug}"> - <classpath refid="test.classpath"/> - </javac> - </target> - - <!-- ================================================================== --> - <!-- Run unit tests --> - <!-- ================================================================== --> - <target name="test" depends="compile-test" if="test.available" - description="Run tests"> - - <junit printsummary="yes" haltonfailure="no" fork="yes" - errorProperty="tests.failed" failureProperty="tests.failed"> - <sysproperty key="test.data" value="${build.test}/data"/> - <sysproperty key="test.input" value="${root}/data"/> - <classpath refid="test.classpath"/> - <formatter type="plain" /> - <batchtest todir="${build.test}" unless="testcase"> - <fileset dir="${src.test}" - includes="**/Test*.java" excludes="**/${test.exclude}.java" /> - </batchtest> - <batchtest todir="${build.test}" if="testcase"> - <fileset dir="${src.test}" includes="**/${testcase}.java"/> - </batchtest> - </junit> - - <fail if="tests.failed">Tests failed!</fail> - - </target> - - <!-- ================================================================== --> - <!-- build war file --> - <!-- ================================================================== --> - <target name="war" depends="compile, compile-plugins" - description="Builds nutchwax war" > - <!--Copy our nutchwax nutch-site.xml template into the build dir as - nutch-site.xml. Then in the below, add it into the WEB-INF/classes dir. - --> - <war destfile="${build.dir}/${name}-webapp${nutchwax.version}.war" webxml="${this.web}/web.xml"> - <fileset dir="${nutch.web}/jsp"> - <exclude name="**/search.jsp"/> - <exclude name="**/web.xml"/> - <exclude name="**/refine*.xml"/> - <!--Don't copy these over until they jsp compile.--> - <exclude name="**/cluster.jsp"/> - <exclude name="**/refine-query*"/> - </fileset> - <fileset dir="${this.web}"> - <exclude name="**/web.xml"/> - </fileset> - <classes dir="${nutch.root}/conf" > - <exclude name="**/*.template"/> - </classes> - <classes dir="${root}/conf"> - <exclude name="**/*.template"/> - </classes> - <classes dir="${nutch.web}/locale"/> - <classes file="${this.web}/log4j.properties"/> - <lib dir="${root}/lib"> - <include name="archive-commons-*.jar" /> - </lib> - <lib dir="${nutch.root}/build"> - <include name="nutch*.jar"/> - </lib> - <lib dir="${nutch.root}/lib"> - <include name="lucene*.jar"/> - <include name="hadoop*.jar"/> - <include name="taglibs-*.jar"/> - <include name="dom4j-*.jar"/> - <include name="xerces-*.jar"/> - <include name="log4j-*.jar"/> - <include name="commons-lang-*.jar"/> - <include name="commons-cli-*.jar"/> - <include name="commons-logging-*.jar"/> - </lib> - <!--Copy into place the nutchwax classes.--> - <zipfileset prefix="WEB-INF/classes" - dir="${build.dir}/classes/" /> - - <!--Be selective about plugins to copy. Shrinks size of webapp. - --> - <zipfileset prefix="WEB-INF/classes/plugins" - dir="${nutch.root}/build/plugins"> - <include name="analysis-*/**" /> - <include name="clustering-*/**" /> - <include name="language-*/**" /> - <include name="lib-lucene-*/**" /> - <include name="lib-log4j-*/**" /> - <include name="lib-regex-*/**" /> - <include name="microformats-*/**" /> - <include name="nutch-*/**" /> - <include name="query-*/**" /> - <include name="urlfilter-*/**" /> - <include name="urlnormalizer-*/**" /> - <include name="summary-*/**" /> - <include name="urlfilter-*/**" /> - </zipfileset> - <zipfileset prefix="WEB-INF/classes/plugins" - dir="${build.dir}/wax-plugins"/> - <webinf dir="${nutch.root}/lib"> - <include name="taglibs-*.tld"/> - </webinf> - </war> - </target> - - - <!-- ================================================================== --> - <!-- Clean. Delete the build files, and their directories --> - <!-- ================================================================== --> - <target name="clean" description="Clean up all built"> - <delete dir="${build.dir}"/> - </target> - - <!-- ================================================================== --> - <!-- Clean all. Delete the build files including third-party builds --> - <!-- and their directories --> - <!-- ================================================================== --> - <target name="clean-all" - depends="clean,third.party.clean" - description="Clean up all built including third-party dependencies" /> - -</project> Modified: trunk/archive-access/projects/nutchwax/nutchwax-thirdparty/pom.xml =================================================================== --- trunk/archive-access/projects/nutchwax/nutchwax-thirdparty/pom.xml 2007-03-21 17:06:07 UTC (rev 1622) +++ trunk/archive-access/projects/nutchwax/nutchwax-thirdparty/pom.xml 2007-03-21 19:13:23 UTC (rev 1623) @@ -37,8 +37,10 @@ Done as part of the generate-sources step so that we can invoke it from eclipse. --> - <echo>Compiling third.party dependencies as part of generate-sources</echo> - <ant dir=".." target="third.party.jar"/> + <echo>Building nutch third-party dependency (jar)</echo> + <ant dir="../third-party/nutch" target="jar" inheritAll="false" > + <property name="build.compiler" value="extJavac" /> + </ant> <!--Copy over the nutch classes to target/classes so they can be found by later modules (target/classes is what maven has on its classpath when it goes to build subsequent modules). @@ -46,7 +48,10 @@ <copy todir="target/classes" overwrite="true"> <fileset dir="../third-party/nutch/build/classes" /> </copy> - <ant dir=".." target="third.party.plugins"/> + <echo>Building nutch third-party dependency (plugins)</echo> + <ant dir="../third-party/nutch" target="compile-plugins" inheritAll="false" > + <property name="build.compiler" value="extJavac" /> + </ant> </tasks> </configuration> <goals> @@ -58,7 +63,10 @@ <phase>clean</phase> <configuration> <tasks> - <ant dir=".." target="clean-all"/> + <echo>Cleaning nutch third-party dependency</echo> + <ant dir="../third-party/nutch" target="clean" inheritAll="false" > + <property name="build.compiler" value="extJavac" /> + </ant> </tasks> </configuration> <goals> Modified: trunk/archive-access/projects/nutchwax/src/main/assembly/distribution.xml =================================================================== --- trunk/archive-access/projects/nutchwax/src/main/assembly/distribution.xml 2007-03-21 17:06:07 UTC (rev 1622) +++ trunk/archive-access/projects/nutchwax/src/main/assembly/distribution.xml 2007-03-21 19:13:23 UTC (rev 1623) @@ -17,8 +17,8 @@ <directory>target</directory> <outputDirectory /> <includes> - <include>nutchwax*.jar</include> - <include>nutchwax*.war</include> + <include>nutchwax-job/target/nutchwax-job*.jar</include> + <include>nutchwax-webapp/nutchwax-webapp*.war</include> </includes> </fileSet> <fileSet> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |