It looks like you're new here. Sign in or register to get started.
<context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="10.3.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="web:crawl_web" compatibility="10.0.000" expanded="true" height="68" name="Crawl Web 5.X" width="90" x="246" y="187"> <parameter key="url" value="https://altair.com/"/> <list key="crawling_rules"/> <parameter key="write_pages_into_files" value="true"/> <parameter key="add_pages_as_attribute" value="false"/> <parameter key="output_dir" value="C:/"/> <parameter key="extension" value="txt"/> <parameter key="max_pages" value="100"/> <parameter key="max_depth" value="1"/> <parameter key="domain" value="server"/> <parameter key="delay" value="1000"/> <parameter key="max_threads" value="1"/> <parameter key="max_page_size" value="500"/> <parameter key="user_agent" value="rapid-miner-crawler"/> <parameter key="obey_robot_exclusion" value="true"/> <parameter key="really_ignore_exclusion" value="false"/> </operator> <connect from_op="Crawl Web 5.X" from_port="Example Set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator> </process>
Answers
- Exception: java.lang.NoClassDefFoundError
- Message: org/apache/tika/parser/html/HtmlParser
- Stack trace:
- edu.uci.ics.crawler4j.parser.TikaHtmlParser.(TikaHtmlParser.java:34)
- edu.uci.ics.crawler4j.parser.Parser.(Parser.java:42)
- edu.uci.ics.crawler4j.crawler.CrawlController.(CrawlController.java:85)
- com.rapidminer.operator.web.crawler.CrawlerOperator.doWork(CrawlerOperator.java:269)
- com.rapidminer.operator.Operator.execute(Operator.java:1024)
- com.rapidminer.operator.execution.SimpleUnitExecutor.execute(SimpleUnitExecutor.java:77)
- com.rapidminer.operator.ExecutionUnit$2.run(ExecutionUnit.java:804)
- com.rapidminer.operator.ExecutionUnit$2.run(ExecutionUnit.java:799)
- java.base/java.security.AccessController.doPrivileged(Native Method)
- com.rapidminer.operator.ExecutionUnit.execute(ExecutionUnit.java:799)
- com.rapidminer.operator.OperatorChain.doWork(OperatorChain.java:423)
- com.rapidminer.operator.Operator.execute(Operator.java:1024)
- com.rapidminer.Process.executeRoot(Process.java:1476)
- com.rapidminer.Process.lambda$executeRootInPool$5(Process.java:1452)
- com.rapidminer.studio.concurrency.internal.AbstractConcurrencyContext$AdaptedCallable.exec(AbstractConcurrencyContext.java:362)
- java.base/java.util.concurrent.ForkJoinTask.doExec(Unknown Source)
- java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(Unknown Source)
- java.base/java.util.concurrent.ForkJoinPool.scan(Unknown Source)
- java.base/java.util.concurrent.ForkJoinPool.runWorker(Unknown Source)
- java.base/java.util.concurrent.ForkJoinWorkerThread.run(Unknown Source)
Would be great if you could assist me in getting this issue resolved.Thanks in advance.
Thankyou.