程序大概如下
FileItem fileItem = (FileItem) fileItr.next();
// System.out.println(item.getString());
if (!fileItem.isFormField()) {
String name = fileItem.getName();
try {
File file = new File(saveDir + name);
//上传的文件
Document document = new Document();
document.setDocLocation(saveDir);
document.setDocName(file.getName());
//向数据库中存储document
//documentDB.InsertDocument(document);
session.persist(document);
//Long docId = document.getDocId();
fileItem.write(file);
// 从此处开始对文本内容进行处理
String content = CUPFileUtils.readFileContent(file);
//未分词的句子
String[] sentences = CUPStringUtils.spliteContentWithSentence(content);
//setContent分词后的内容
String segContent = CUPStringUtils.splitContent(content, path);
//分词后的分句
//不能这样用,JE分词后标点就被删掉了。
String[] segSentences = CUPStringUtils.spliteContentWithSentence(segContent);
//对插入关系进行批处理
//Transaction tx = session.beginTransaction();
//控制插入的数量
int index = 0;
//从每个句子中查找关系,有则存储到数据库中。
for(String sentence : sentences) {
sentence = CUPStringUtils.splitContent(sentence, path);
String[] words = sentence.split(" ");
TextRelation relation = CUPStringUtils.findRelation2(words, verbs, subjecs, objects);
if(relation != null) {
index++;
session.persist(relation);
document.getRelations().add(relation);
//每插入30个数据,刷新一下
/*if(index % 30 == 0) {
session.flush();
session.clear();
}*/
}
}
//重新开始计数
index = 0;
//Transaction tx2 = session.beginTransaction();
// 统计词频
Map<String, Integer> map = CUPStringUtils
.countWords(segContent);
// 对结果排序
Map<String, Integer> sortMap = CUPStringUtils
.sortMap(map);
//全部的词频
int totalWords = map.get("#全部词频");
for(String word : sortMap.keySet()) {
double weigh = (double)sortMap.get(word)/totalWords;
//词频大于10或者占总词数的百分之一,就看做是高频词
if(sortMap.get(word) > 10 || weigh>0.01) {
KeyWord keyWord = new KeyWord();
index ++;
//设置词性
if(wordList.containsKey(word)) {
keyWord.setWordMark(wordList.get(word));
//假如词性为名词,则把含有名词的句子存入数据库
if(wordList.get(word).equals("n") || wordList.get(word).equals("N")) {
Set<String> ss = CUPStringUtils.sentencesWithKeyWord(word, sentences);
keyWord.getSentences().addAll(ss);
}
} else{
keyWord.setWordMark("未知词性");
}
keyWord.setWord(word);
keyWord.setFrequency(sortMap.get(word));
keyWord.setWeigh(weigh);
//关键词存储到数据库
session.persist(keyWord);
document.getKeywordSet().add(keyWord);
/*if(index % 30 == 0) {
session.flush();
session.clear();
}*/
}
}
tx.commit();
//tx2.commit();
HibernateSessionFactory.closeSession();
System.out.println("提取知识结束========");
程序运行的时候,假如文本比较短的时候可能不出问题,但是有时也会出问题,异常如下org.hibernate.exception.GenericJDBCException: could not insert collection: [edu.cup.text2onto.bean.KeyWord.sentences#30]
at org.hibernate.exception.SQLStateConverter.handledNonSpecificException(SQLStateConverter.java:103)
at org.hibernate.exception.SQLStateConverter.convert(SQLStateConverter.java:91)
at org.hibernate.exception.JDBCExceptionHelper.convert(JDBCExceptionHelper.java:43)
at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1183)
at org.hibernate.action.CollectionRecreateAction.execute(CollectionRecreateAction.java:26)
at org.hibernate.engine.ActionQueue.execute(ActionQueue.java:250)
at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:234)
at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:145)
at org.hibernate.event.def.AbstractFlushingEventListener.performExecutions(AbstractFlushingEventListener.java:298)
at org.hibernate.event.def.DefaultFlushEventListener.onFlush(DefaultFlushEventListener.java:27)
at org.hibernate.impl.SessionImpl.flush(SessionImpl.java:1000)
at org.hibernate.impl.SessionImpl.managedFlush(SessionImpl.java:338)
at org.hibernate.transaction.JDBCTransaction.commit(JDBCTransaction.java:106)
at edu.cup.upload.UploadFile.doPost(UploadFile.java:217)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:637)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:290)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at filters.SetCharacterEncodingFilter.doFilter(SetCharacterEncodingFilter.java:122)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:298)
at org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:852)
at org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:588)
at org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489)
at java.lang.Thread.run(Thread.java:619)
Caused by: java.sql.BatchUpdateException: Incorrect string value: '\xE2\x80\x94\xE2\x80\x94...' for column 'sentence' at row 1
at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1666)
at com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1082)
at org.hibernate.jdbc.BatchingBatcher.doExecuteBatch(BatchingBatcher.java:48)
at org.hibernate.jdbc.BatchingBatcher.addToBatch(BatchingBatcher.java:34)
at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1146)
... 27 more
FileItem fileItem = (FileItem) fileItr.next();
// System.out.println(item.getString());
if (!fileItem.isFormField()) {
String name = fileItem.getName();
try {
File file = new File(saveDir + name);
//上传的文件
Document document = new Document();
document.setDocLocation(saveDir);
document.setDocName(file.getName());
//向数据库中存储document
//documentDB.InsertDocument(document);
session.persist(document);
//Long docId = document.getDocId();
fileItem.write(file);
// 从此处开始对文本内容进行处理
String content = CUPFileUtils.readFileContent(file);
//未分词的句子
String[] sentences = CUPStringUtils.spliteContentWithSentence(content);
//setContent分词后的内容
String segContent = CUPStringUtils.splitContent(content, path);
//分词后的分句
//不能这样用,JE分词后标点就被删掉了。
String[] segSentences = CUPStringUtils.spliteContentWithSentence(segContent);
//对插入关系进行批处理
//Transaction tx = session.beginTransaction();
//控制插入的数量
int index = 0;
//从每个句子中查找关系,有则存储到数据库中。
for(String sentence : sentences) {
sentence = CUPStringUtils.splitContent(sentence, path);
String[] words = sentence.split(" ");
TextRelation relation = CUPStringUtils.findRelation2(words, verbs, subjecs, objects);
if(relation != null) {
index++;
session.persist(relation);
document.getRelations().add(relation);
//每插入30个数据,刷新一下
/*if(index % 30 == 0) {
session.flush();
session.clear();
}*/
}
}
//重新开始计数
index = 0;
//Transaction tx2 = session.beginTransaction();
// 统计词频
Map<String, Integer> map = CUPStringUtils
.countWords(segContent);
// 对结果排序
Map<String, Integer> sortMap = CUPStringUtils
.sortMap(map);
//全部的词频
int totalWords = map.get("#全部词频");
for(String word : sortMap.keySet()) {
double weigh = (double)sortMap.get(word)/totalWords;
//词频大于10或者占总词数的百分之一,就看做是高频词
if(sortMap.get(word) > 10 || weigh>0.01) {
KeyWord keyWord = new KeyWord();
index ++;
//设置词性
if(wordList.containsKey(word)) {
keyWord.setWordMark(wordList.get(word));
//假如词性为名词,则把含有名词的句子存入数据库
if(wordList.get(word).equals("n") || wordList.get(word).equals("N")) {
Set<String> ss = CUPStringUtils.sentencesWithKeyWord(word, sentences);
keyWord.getSentences().addAll(ss);
}
} else{
keyWord.setWordMark("未知词性");
}
keyWord.setWord(word);
keyWord.setFrequency(sortMap.get(word));
keyWord.setWeigh(weigh);
//关键词存储到数据库
session.persist(keyWord);
document.getKeywordSet().add(keyWord);
/*if(index % 30 == 0) {
session.flush();
session.clear();
}*/
}
}
tx.commit();
//tx2.commit();
HibernateSessionFactory.closeSession();
System.out.println("提取知识结束========");
程序运行的时候,假如文本比较短的时候可能不出问题,但是有时也会出问题,异常如下org.hibernate.exception.GenericJDBCException: could not insert collection: [edu.cup.text2onto.bean.KeyWord.sentences#30]
at org.hibernate.exception.SQLStateConverter.handledNonSpecificException(SQLStateConverter.java:103)
at org.hibernate.exception.SQLStateConverter.convert(SQLStateConverter.java:91)
at org.hibernate.exception.JDBCExceptionHelper.convert(JDBCExceptionHelper.java:43)
at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1183)
at org.hibernate.action.CollectionRecreateAction.execute(CollectionRecreateAction.java:26)
at org.hibernate.engine.ActionQueue.execute(ActionQueue.java:250)
at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:234)
at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:145)
at org.hibernate.event.def.AbstractFlushingEventListener.performExecutions(AbstractFlushingEventListener.java:298)
at org.hibernate.event.def.DefaultFlushEventListener.onFlush(DefaultFlushEventListener.java:27)
at org.hibernate.impl.SessionImpl.flush(SessionImpl.java:1000)
at org.hibernate.impl.SessionImpl.managedFlush(SessionImpl.java:338)
at org.hibernate.transaction.JDBCTransaction.commit(JDBCTransaction.java:106)
at edu.cup.upload.UploadFile.doPost(UploadFile.java:217)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:637)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:290)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at filters.SetCharacterEncodingFilter.doFilter(SetCharacterEncodingFilter.java:122)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:298)
at org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:852)
at org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:588)
at org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489)
at java.lang.Thread.run(Thread.java:619)
Caused by: java.sql.BatchUpdateException: Incorrect string value: '\xE2\x80\x94\xE2\x80\x94...' for column 'sentence' at row 1
at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1666)
at com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1082)
at org.hibernate.jdbc.BatchingBatcher.doExecuteBatch(BatchingBatcher.java:48)
at org.hibernate.jdbc.BatchingBatcher.addToBatch(BatchingBatcher.java:34)
at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1146)
... 27 more
解决方案 »
- <Loader delegate="true"/>的作用?
- strut2国际化(受不了了)
- web service异常问题
- 庆国庆,6位QQ号送........
- servletContext.log("asdfasdf");说是输出到了容器,请问到底输出到哪了
- websphere配置数据源已经成功,怎么样用jsp或者servlet连接数据库,在线等待
- ant编译脚本的错误"Default target 'dist' does not exists in project"
- 如何让EntityBean只load一次!
- 在java中,怎么得到局域网络上的所有机器名和网络上可用的oracle数据库实例名
- 关于jFinal的求助
- SSH配置出错,跪求各位大侠给个解决方案
- Java如何操作dbf格式数据表中的字段
org.hibernate.exception.ConstraintViolationException: could not insert collection: [edu.cup.text2onto.bean.Document.keywordSet#2]
at org.hibernate.exception.SQLStateConverter.convert(SQLStateConverter.java:71)
at org.hibernate.exception.JDBCExceptionHelper.convert(JDBCExceptionHelper.java:43)
at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1183)
at org.hibernate.action.CollectionRecreateAction.execute(CollectionRecreateAction.java:26)
at org.hibernate.engine.ActionQueue.execute(ActionQueue.java:250)
at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:234)
at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:145)
at org.hibernate.event.def.AbstractFlushingEventListener.performExecutions(AbstractFlushingEventListener.java:298)
at org.hibernate.event.def.DefaultFlushEventListener.onFlush(DefaultFlushEventListener.java:27)
at org.hibernate.impl.SessionImpl.flush(SessionImpl.java:1000)
at org.hibernate.impl.SessionImpl.managedFlush(SessionImpl.java:338)
at org.hibernate.transaction.JDBCTransaction.commit(JDBCTransaction.java:106)
at edu.cup.upload.UploadFile.doPost(UploadFile.java:217)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:637)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:290)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at filters.SetCharacterEncodingFilter.doFilter(SetCharacterEncodingFilter.java:122)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:298)
at org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:852)
at org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:588)
at org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489)
at java.lang.Thread.run(Thread.java:619)
Caused by: java.sql.BatchUpdateException: Duplicate entry '2-164' for key 'PRIMARY'
at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1666)
at com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1082)
at org.hibernate.jdbc.BatchingBatcher.doExecuteBatch(BatchingBatcher.java:48)
at org.hibernate.jdbc.BatchingBatcher.addToBatch(BatchingBatcher.java:34)
at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1146)
... 27 more
外键 重复
public class TextRelation {
private Long id;
private String subject;
private String predicate;
private String object;
}KeyWord.java
public class KeyWord {
private Long wordId;
private String word;
private String wordMark;
private Integer frequency;
private Double weigh;
private Set<String> sentences = new HashSet<String>();Document.java
public class Document {
private Long docId;
private String docLocation;
private String docName;
private Set<KeyWord> keywordSet = new HashSet<KeyWord>();
private Set<TextRelation> relations = new HashSet<TextRelation>();
配置文件如下 <class name="TextRelation" table="relation">
<id name="id" column="relation_id">
<generator class="native" />
</id>
<property name="subject" column="subject" type="string"
not-null="true" />
<property name="predicate" column="predicate" type="string"
not-null="true" />
<property name="object" column="object" type="string"
not-null="true" />
</class> <class name="KeyWord" table="keyword">
<id name="wordId" column="wordid">
<generator class="native" />
</id>
<property name="word" column="word" type="string"
not-null="true" />
<property name="wordMark" column="word" type="string"
not-null="true" />
<property name="frequency" column="frequency" type="integer"
not-null="true" />
<property name="weigh" column="weigh" type="double"
not-null="true" />
<set name="sentences" table="sentene_with_keyword">
<key column="wordid"/>
<element type="string" column="sentence"></element>
</set>
</class> <class name="Document" table="document">
<id name="docId" column="docid">
<generator class="native" />
</id> <property name="docName" column="docName" type="string"
not-null="true" />
<property name="docLocation" column="docLocation" type="string"
not-null="true" />
<set name="keywordSet" table="doc_keyword">
<key column="docid"></key>
<many-to-many column="wordid"
unique="true"
class="KeyWord"/>
</set>
<set name="relations" table="doc_relation">
<key column="docid"></key>
<many-to-many column="relationid"
unique="true"
class="TextRelation"/>
</set>
</class>