主要是对一篇很长的文章,统计每个单词出现的次数,请大家给个效率比较高的方法,谢谢了!
问题补充:用vc给个具体的实现代码,假设这篇文章的每个单词已经存储到数组里了,再假设这篇文章共计20000个单词。 

解决方案 »

  1.   

    我会选择用正则匹配整个字符串。如果是数组,先排序,然后从头到尾扫一遍就可以搞定。也可以把数组每个成员放到hashtalbe里,最后看hashtalbe的大小。或者导入数据库,用select distinct
      

  2.   

    用一个Node型的数组临时存储从文件中读入的单词,然后生成一个排序二叉树,中序遍历输出二叉树中节点的值;
    #include <stdio.h>
    #include<string.h>
    struct Node
    {
        char a[8];
    }; /*临时存储数据的节点*/
    struct Tree
    {
        char b[8];
        int w;
        struct Tree *left;
        struct Tree *right;
    };
    void main()
    {
        FILE *fp;
        struct Node m[400];
        struct Tree *root,*p,*q;
    char filename[40],ch;
    int i,j,k,l;
    root=(struct Tree *)malloc (sizeof(struct Tree));
    root=NULL;
    for(i=0;i<400;i++)
    {
        for(j=0;j<8;j++)
        {
            m[i].a[j]=NULL;
         }
    }
    printf("Please input the name of file: ");
    scanf("%s", filename);
    fflush(stdin);
    if((fp=fopen(filename, "r")) == NULL)
    {
        printf("Cannot open the file.\n");
        exit(0);
    }
    else
    {
        i=0;
        j=0;
        ch=fgetc(fp);
        while (ch!=EOF)
        {
            if(((ch<=122)&&(ch>=65))||(ch=' '))
            {
                   if(i>=8)
                  {
                    if(ch==' ')
                    {
                           i=0;
                           j++;
                           ch=fgetc(fp);
                     }
                    else
                    {
                        ch=fgetc(fp);
                        continue;
                     }
                   }
                   if(ch==' ')
                   {
                       if(m[j].a[0]!=NULL)
                       {j++;}
                       i=0;
                       ch=fgetc(fp);
                   }
                   else
                   {
                       if((ch>=97)&&(ch<=122))
                       {ch=ch-32;}
                       m[j].a[i]=ch;
                       i++;
                       ch=fgetc(fp);
                   }
               }
               else
               {
                    ch=fgetc(fp);
                    continue;
               }
           }
        }  /*上面部分是实现文件的输入,基本上没什么问题*/
        l=j;/*保存节点数*/
        for(i=0;i<l;i++)   /*下部分是生成二叉树*/
        {        if(m[i].a[0]==NULL)
            {break;}    /*如果临时存放数据的数组的数据域为空,则二叉树生成结束*/        else
            {
               p=(struct Tree *)malloc(sizeof(struct Tree));
               p->left=NULL;
               p->right=NULL;
               p->w=1;           for(k=0;k<8;k++)
               {
                   p->b[k]=m[i].a[k];
               }           if(root==NULL)
               {root=p;}           else
               {
                   q=root;
                   while(q!=NULL)
                   {
                       if((strcmp(q->b,m[i].a))>0)
                       {
                         if((q->left)!=NULL)
                         {
                           q=q->left;
                           if((strcmp(q->b,m[i].a))==0)
                           {
                              q->w=q->w+1;
                              q=NULL;
                           }
                          }
                         else
                         {
                            q->left=p;
                            q=NULL;
                         }
                       }                   if((strcmp(q->b,m[i].a))<0)
                       {
                          if((q->right)!=NULL)
                          {
                             q=q->right;
                             if((strcmp(q->b,m[i].a))==0)
                             {
                                q->w=q->w+1;
                                q=NULL;
                             }
                          }
                          else
                          {
                             q->right=p;
                             q=NULL;
                          }
                       }                  else
                       {
                          q->w=q->w+1;
                          q=NULL;
                       }
                   }
               }
            }
        }
        fclose(fp);