-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearchPagerank.c
93 lines (79 loc) · 3.22 KB
/
searchPagerank.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int number_of_urls(void); //get the number of urls
void geturls(char **links); //get the specific contents eg urlxx
int main(int argc, char *argv[])
{
int elements = number_of_urls();
int i, j;
int args = argc -1;
char **links = malloc(sizeof(char *) * elements); //allocate memory
for(i = 0; i < elements; i++)
links[i] = malloc(sizeof(char) * 20);
for(i = 0; i< elements; i++)
for(j = 0; j < 20; j++)
links[i][j] = '\0'; //fill '\0' to every links[i] (url)
geturls(links); //get every url from "pagerankList.txt
char words[argc-1][30]; //declare string array and the max length of word less than 30
for(i = 0; i < args; i++){ //the number of command arguments loops
strcpy(words[i],argv[i+1]); //copy string argv[i+1] to words[i]
strcat(words[i], " "); //append a copy of the source string to the end of destination string
}
FILE *word_urls = fopen("invertedIndex.txt", "r"); //open the file invertedIndex.txt
char temp_words[200];
char match_lines[args][100];
while(fgets(temp_words, 200, word_urls) != NULL)//It reads a line from the word_urls and stores it into temp_words by str
for(i = 0; i < args; i++)
if(!strncmp(words[i], temp_words, strlen(words[i]))){ //compare no more than the length of word[i] between words[i] and temp_words
strcpy(match_lines[i],temp_words); //if success, find the specific line from invertedIndex.txt and copy the temp_words to match_lines
}
int dic_urls[elements]; //initialise array which contains the number of urls elements
for(i = 0; i < elements; i++)
dic_urls[i] = 0;
//finds the first occurrence of the sub-string lins[j] in the string mathc_lines[i]
for(i = 0; i < args; i++){
for(j = 0; j < elements; j++){
if(strstr(match_lines[i], links[j]) != NULL){ //he word line which match specific url
dic_urls[j]++; //find the url, url++
}
}
}
int k,l;
if (elements<=30) {
for (l = args; l > 0; l--) { //in descending order
for (k = 0; k < elements; ++k) { //traverse every url
if (dic_urls[k] == l) {
printf("%s\n", links[k]);
}
}
}
}
fclose(word_urls); //close the file
return 0;
}
//count the number of url from pagerankList.txt
int number_of_urls()
{
FILE *page = fopen("pagerankList.txt", "r"); //open the file
char temp_store[200];
int elements = 0; //line =0
while(fgets(temp_store,200, page) != NULL){ //read by line
elements+=1; //line +1
}
fclose(page); //close the file
return elements;
}
//get every urlxx from pagerankList.txt
void geturls(char *links[])
{
FILE *furls = fopen("pagerankList.txt", "r");//open the file
char temp_storeg[200];
int i,j;
for(i = 0; fgets(temp_storeg,200, furls) != NULL; ++i){ //read by line
for(j = 0; temp_storeg[j] != ','; ++j) { //contents separated by ','
links[i][j] = temp_storeg[j]; //string array links = urlxx
}
}
fclose(furls); //close the file
}