程序大概如下
FileItem fileItem = (FileItem) fileItr.next();
// System.out.println(item.getString());
if (!fileItem.isFormField()) {
String name = fileItem.getName();
try {
File file = new File(saveDir + name);



//上传的文件
Document document = new Document();
document.setDocLocation(saveDir);
document.setDocName(file.getName());

//向数据库中存储document
//documentDB.InsertDocument(document);
session.persist(document);

//Long docId = document.getDocId();

fileItem.write(file);
// 从此处开始对文本内容进行处理
String content = CUPFileUtils.readFileContent(file);

//未分词的句子
String[] sentences = CUPStringUtils.spliteContentWithSentence(content);

//setContent分词后的内容
String segContent = CUPStringUtils.splitContent(content, path);

//分词后的分句
//不能这样用,JE分词后标点就被删掉了。
String[] segSentences = CUPStringUtils.spliteContentWithSentence(segContent);



//对插入关系进行批处理

//Transaction tx = session.beginTransaction();

//控制插入的数量
int index = 0;

//从每个句子中查找关系,有则存储到数据库中。
for(String sentence : sentences) {
sentence = CUPStringUtils.splitContent(sentence, path);

String[] words = sentence.split(" ");
TextRelation relation = CUPStringUtils.findRelation2(words, verbs, subjecs, objects);
if(relation != null) {
index++;
session.persist(relation);
document.getRelations().add(relation);

//每插入30个数据,刷新一下
/*if(index % 30 == 0) {
session.flush();
session.clear();
}*/
}
}


//重新开始计数
index = 0;

//Transaction tx2 = session.beginTransaction();


// 统计词频
Map<String, Integer> map = CUPStringUtils
.countWords(segContent);
// 对结果排序
Map<String, Integer> sortMap = CUPStringUtils
.sortMap(map);

//全部的词频
int totalWords = map.get("#全部词频");

for(String word : sortMap.keySet()) {
double weigh = (double)sortMap.get(word)/totalWords;

//词频大于10或者占总词数的百分之一,就看做是高频词
if(sortMap.get(word) > 10 || weigh>0.01) {

KeyWord keyWord = new KeyWord();



index ++;

//设置词性
if(wordList.containsKey(word)) {
keyWord.setWordMark(wordList.get(word));
//假如词性为名词,则把含有名词的句子存入数据库
if(wordList.get(word).equals("n") || wordList.get(word).equals("N")) {

Set<String> ss = CUPStringUtils.sentencesWithKeyWord(word, sentences);



keyWord.getSentences().addAll(ss);
}
} else{
keyWord.setWordMark("未知词性");
}

keyWord.setWord(word);
keyWord.setFrequency(sortMap.get(word));

keyWord.setWeigh(weigh);

//关键词存储到数据库
session.persist(keyWord);

document.getKeywordSet().add(keyWord);
/*if(index % 30 == 0) {
session.flush();
session.clear();
}*/
}
}
tx.commit();
//tx2.commit();
HibernateSessionFactory.closeSession();
System.out.println("提取知识结束========");
程序运行的时候,假如文本比较短的时候可能不出问题,但是有时也会出问题,异常如下org.hibernate.exception.GenericJDBCException: could not insert collection: [edu.cup.text2onto.bean.KeyWord.sentences#30]
at org.hibernate.exception.SQLStateConverter.handledNonSpecificException(SQLStateConverter.java:103)
at org.hibernate.exception.SQLStateConverter.convert(SQLStateConverter.java:91)
at org.hibernate.exception.JDBCExceptionHelper.convert(JDBCExceptionHelper.java:43)
at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1183)
at org.hibernate.action.CollectionRecreateAction.execute(CollectionRecreateAction.java:26)
at org.hibernate.engine.ActionQueue.execute(ActionQueue.java:250)
at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:234)
at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:145)
at org.hibernate.event.def.AbstractFlushingEventListener.performExecutions(AbstractFlushingEventListener.java:298)
at org.hibernate.event.def.DefaultFlushEventListener.onFlush(DefaultFlushEventListener.java:27)
at org.hibernate.impl.SessionImpl.flush(SessionImpl.java:1000)
at org.hibernate.impl.SessionImpl.managedFlush(SessionImpl.java:338)
at org.hibernate.transaction.JDBCTransaction.commit(JDBCTransaction.java:106)
at edu.cup.upload.UploadFile.doPost(UploadFile.java:217)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:637)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:290)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at filters.SetCharacterEncodingFilter.doFilter(SetCharacterEncodingFilter.java:122)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:298)
at org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:852)
at org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:588)
at org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489)
at java.lang.Thread.run(Thread.java:619)
Caused by: java.sql.BatchUpdateException: Incorrect string value: '\xE2\x80\x94\xE2\x80\x94...' for column 'sentence' at row 1
at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1666)
at com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1082)
at org.hibernate.jdbc.BatchingBatcher.doExecuteBatch(BatchingBatcher.java:48)
at org.hibernate.jdbc.BatchingBatcher.addToBatch(BatchingBatcher.java:34)
at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1146)
... 27 more

解决方案 »

  1.   

    有时候也肯能出现下面的异常
    org.hibernate.exception.ConstraintViolationException: could not insert collection: [edu.cup.text2onto.bean.Document.keywordSet#2]
    at org.hibernate.exception.SQLStateConverter.convert(SQLStateConverter.java:71)
    at org.hibernate.exception.JDBCExceptionHelper.convert(JDBCExceptionHelper.java:43)
    at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1183)
    at org.hibernate.action.CollectionRecreateAction.execute(CollectionRecreateAction.java:26)
    at org.hibernate.engine.ActionQueue.execute(ActionQueue.java:250)
    at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:234)
    at org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:145)
    at org.hibernate.event.def.AbstractFlushingEventListener.performExecutions(AbstractFlushingEventListener.java:298)
    at org.hibernate.event.def.DefaultFlushEventListener.onFlush(DefaultFlushEventListener.java:27)
    at org.hibernate.impl.SessionImpl.flush(SessionImpl.java:1000)
    at org.hibernate.impl.SessionImpl.managedFlush(SessionImpl.java:338)
    at org.hibernate.transaction.JDBCTransaction.commit(JDBCTransaction.java:106)
    at edu.cup.upload.UploadFile.doPost(UploadFile.java:217)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:637)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
    at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:290)
    at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
    at filters.SetCharacterEncodingFilter.doFilter(SetCharacterEncodingFilter.java:122)
    at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235)
    at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
    at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
    at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
    at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
    at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
    at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
    at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:298)
    at org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:852)
    at org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:588)
    at org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489)
    at java.lang.Thread.run(Thread.java:619)
    Caused by: java.sql.BatchUpdateException: Duplicate entry '2-164' for key 'PRIMARY'
    at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1666)
    at com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1082)
    at org.hibernate.jdbc.BatchingBatcher.doExecuteBatch(BatchingBatcher.java:48)
    at org.hibernate.jdbc.BatchingBatcher.addToBatch(BatchingBatcher.java:34)
    at org.hibernate.persister.collection.AbstractCollectionPersister.recreate(AbstractCollectionPersister.java:1146)
    ... 27 more
      

  2.   

    Caused by: java.sql.BatchUpdateException: Duplicate entry '2-164' for key 'PRIMARY'
    外键 重复
      

  3.   

    程序如下  TextRelation.java
    public class TextRelation {
    private Long id;
    private String subject;
    private String predicate;
    private String object;
    }KeyWord.java
    public class KeyWord {
    private Long wordId;
    private String word;
    private String wordMark;
    private Integer frequency;
    private Double weigh;
    private Set<String> sentences = new HashSet<String>();Document.java
    public class Document {
    private Long docId;
    private String docLocation;
    private String docName;
    private Set<KeyWord> keywordSet = new HashSet<KeyWord>();
    private Set<TextRelation> relations = new HashSet<TextRelation>();

    配置文件如下 <class name="TextRelation" table="relation">
    <id name="id" column="relation_id">
    <generator class="native" />
    </id>

    <property name="subject" column="subject" type="string"
    not-null="true" />
    <property name="predicate" column="predicate" type="string"
    not-null="true" />
    <property name="object" column="object" type="string"
    not-null="true" />
    </class> <class name="KeyWord" table="keyword">
    <id name="wordId" column="wordid">
    <generator class="native" />
    </id>

    <property name="word" column="word" type="string"
    not-null="true" />

    <property name="wordMark" column="word" type="string"
    not-null="true" />

    <property name="frequency" column="frequency" type="integer"
    not-null="true" />

    <property name="weigh" column="weigh" type="double"
    not-null="true" />

    <set name="sentences" table="sentene_with_keyword">
    <key column="wordid"/>
    <element type="string" column="sentence"></element>
    </set>
    </class> <class name="Document" table="document">
    <id name="docId" column="docid">
    <generator class="native" />
    </id> <property name="docName" column="docName" type="string"
    not-null="true" />
    <property name="docLocation" column="docLocation" type="string"
    not-null="true" />

    <set name="keywordSet" table="doc_keyword">
    <key column="docid"></key>
    <many-to-many column="wordid"  
    unique="true"
    class="KeyWord"/>
    </set>

    <set name="relations" table="doc_relation">
    <key column="docid"></key>
    <many-to-many  column="relationid"
    unique="true"
    class="TextRelation"/>
    </set>

    </class>