1.
项目需求:1、分析文本文件的每个词语出现的频率;
2.
2、将最高频率的10个词打印出来。
3.
规格说明:要求文本文件在30k~300K之间。
4.
概要设计:将文本文件读取出来-〉读取过程中将每个词记录下来,并计数-〉找出计数最多的10个词输出
5.
实现步骤:
#include "stdio.h"
#include "stdlib.h"
#include "ctype.h"
#include "string.h"
struct L{
char p[30];
int n;
};
int sum=0;
void read(struct L
word[]);
void sort(struct L
word[]);
void main(){
struct L
word;
char
*a[50]={"i","the","and","a","to","of","he","you","it",
"we","us","after","there","are","is","from","me","them","about",
"was","were","on","in","your","that","all","him","his","at","up",
"out","but","for","this","didn't","very","or","with","over","around"
,"had","just","","have","too","always"};
read(word);
sort(word);
while(!strcmp(word.p,a)){
cout<<word.p<<endl);
}
cout<<"总运行时间:"<<(finish-start)/1000<<"s"<<endl;
}
}
void read(struct L
word[])
{
ifstream
in("d:/try.txt");
in>>noskipws;
if(!in)
{cout<<"cannot
open!"<<endl;return;}
char ch,temp[30];
while(in)
{
int i=0;
in>>ch;
temp[0]='\0';
while((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||temp[0]=='\0')
{
if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z')
{
temp[i]=ch;
i++;
}
in>>ch;
if(in.eof())break;
}
temp[i]='\0';
for(i=0;i
{
if(!_stricmp(temp,word[i].p))
{
word[i].n++;break;}
}
if(i==sum)
{
strcpy(word[sum].p,temp);
word[sum].n=1;
sum++;
}
}
cout<<"读文件,分出单词并统计的时间:"<<(f-s)/1000<<"s"<<endl;
in.close();
}
void sort(struct L
word[])
{
struct L
temp;
for(int i=0;i
for(int j=0;j
if(word[j].n
{
strcpy(temp.p,word[j].p);
temp.n=word[j].n;
strcpy(word[j].p,word[j+1].p);
word[j].n=word[j+1].n;
strcpy(word[j+1].p,temp.p);
word[j+1].n=temp.n;
}
}
实验结果:
http://s5/mw690/003i71TNgy6NBKIeHWY44&690
性能分析:http://s16/mw690/001YxvFkgy6HsPAbZHp7f&690
http://s9/mw690/001YxvFkgy6HsPAgNxCc8&690
http://s8/mw690/004awxmYgy6MZ7sZMeX97&690
http://s3/mw690/004awxmYgy6MZ7xgDJg82&690
http://s5/mw690/004awxmYgy6MZ7xk8G824&690
从图中可以看出,main函数的已用非独占时间百分比几乎占用程序运行的整个事件,而其本身的已用独占时间百分比只占15%,执行main函数本身并不占用太多时间。read函数的已用非独占时间百分比为85%,是程序主要实现功能的部分,但其独占时间百分比仅为4.6%,而strcmp函数是read函数中实现本程序核心功能的函数,每读取一个单词将与前面已存储的单词依次进行比较,已用独占时间百分比为60%。
加载中,请稍候......