Если это поможет --
вот:
#define NDEBUG
//#define TIME
//#define WRITE
//#define DEBUG
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <pthread.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <time.h>
/* Windows-System */
#ifdef _WIN32
#include <winsock.h>
#include <io.h>
/* Unix-System */
#else
#include <sys/socket.h>
#include <sys/types.h>
#include <pcreposix.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <unistd.h>
#endif
#define HTTP_PORT 80
#define NUM_SITES 46
#define MAX_NUM_TEG 5
#define BUFF_CONTEXT 4096
#define NUM_CONNECTIONS 10
#define NUM_NoT 3
#define NUM_PF 3
/*definishions of errors*/
#define NAMES_OF_TEGS_IS_NOT_FOUND 1
#define PARSE_FAIL 2
#define ERR_REGCOMP 11
#define ERR_REGEXEC 12
#define MATCHPTR_RM_EO_EQ_MINUS_1 13
#define ERR_PTRNULL 31
#define ERR_IMPOSSIBLE 34
#define ERR_REGNOTFOUND 35
int Status[NUM_SITES] = {0};
pthread_mutex_t flag;
double TIME_OUT = 2;
void work_funct (void *i);
int RasborXML (const char *teg[MAX_NUM_TEG], const int kol_teg, const char *xml, const int j);
int RasborHTML (const char *teg[MAX_NUM_TEG], const int kol_teg, char *html, const int j);
int str_repl_reg_null (char *str, const char *pattern);
int main (int argc, char *argv[])
{
if (( (argc-5)%4 ) || (argc < 13))
{
fprintf (stderr, "Notice: Incorrect agguments \
for parser. Contact to the developers.\n");
return (-1);
}
strncpy (hostnames[25], argv[5], 32);
strncpy (hostnames[26], argv[6], 32);
TIME_OUT = atof(argv[4]);
/*struct timespec tm1, tm2;clock_gettime(CLOCK_REALTIME, &tm1);*/
pthread_t thread[NUM_SITES];
int i, j;
for (i = 0; i<NUM_SITES; i++)
indexes[i] = i;
for (i = 9; i<argc; i+=4)
for (j = 0; j<NUM_SITES; j++)
if (!strcmp (SiteIdentificators[j], argv[i]))
{
Status[j] = 1;
sprintf (Site_id[j], argv[i+1]);
sprintf (Site_id2[j], argv[i+2]);
sprintf (Num_results[j], argv[i+3]);
break;
}
if (Status[9])
{
sprintf (BaseStrings[9], "/feed/xml.aspx?pn=1&filter=no&st=%s&rid=%s&",
argv[7], argv[8]);
}
if (Status[28])
{
sprintf (BaseStrings[28], "%s%s%s", SubArrayBeg[0], Site_id[28], SubArrayEnd[0]);
}
if (Status[30])
{
sprintf (BaseStrings[30], "%s%s%s", SubArrayBeg[1], Site_id[30], SubArrayEnd[1]);
}
if (Status[40])
{
sprintf (BaseStrings[40], "%s%s%s%s%s", SubArrayBeg[2],
Site_id2[40],
SubArrayMiddl[2],
Num_results[40],
SubArrayEnd[2]);
}
for (i = 0; i<NUM_SITES; i++)
if (Status[i])
{
int b = 0;
if (!strcmp (Site_id2[i], "NULL"))
b = 1;
else if (!strcmp (Site_id2Names[i], ""))
b = 1;
sprintf (Commands[i], "%s%s%s%s%s%s%s%s%s%s%s%s%s", BaseStrings[i],
KeywordNames[i], argv[1],
IpNames[i], ((!strcmp (IpNames[i], ""))?"":argv[2]),
Site_idNames[i], ((!strcmp (Site_idNames[i], ""))?"":Site_id[i]),
Num_resultsNames[i],((!strcmp (Num_resultsNames[i], ""))?"":Num_results[i]),
((!strcmp (Site_id2[i], "NULL"))?"":Site_id2Names[i]), (b?"":Site_id2[i]),
Sub_idNames[i], ((!strcmp (Sub_idNames[i], ""))?"":argv[3]));
}
#ifdef WRITE
for (i = 0; i<NUM_SITES; i++)
if (Status[i])
printf("%s%s\n",hostnames[i], Commands[i]);
#endif
for (i = 0; i<NUM_SITES; i++)
if (Status[i])
pthread_create (&thread[indexes[i]], NULL, (void*)&work_funct, (void*) &indexes[i]);
for (i = 0; i<NUM_SITES; i++)
if (Status[i])
pthread_join (thread[indexes[i]], NULL);
/*printf("Main thread done.\n");*/
pthread_mutex_destroy (&flag);
printf("\n");
return 0;
}
void work_funct(void *i)
{
alarm(TIME_OUT);
int *k=(int *)i;
int j=*k;
printf("Process %d started.\n",j);
int sock;
struct sockaddr_in host_addr;
struct hostent *hostinfo;
const char *host, *file;
char command[1024];
char buf[1024];
int bigbuf_index=0;
char bigbuf[204800];
unsigned int bytes_sent, bytes_recv;
host = hostnames[j];
file = Commands[j];
int kol_parse_fail = 0, kol_names_of_tegs = 0, n = 0;
/*
struct timespec tm1, tm2;
clock_gettime (CLOCK_REALTIME, &tm1);
*/
do
{
bigbuf[0] = 0;
bigbuf_index = 0;
#ifdef _WIN32
WSADATA wsaData;
if (WSAStartup (MAKEWORD(1, 1), &wsaData) != 0)
{
fprintf (stderr, "WSAStartup(): Kann Winsock nicht initialisieren.\n");
return;
}
#endif
sock = socket (AF_INET, SOCK_STREAM, 0);
if (sock == -1)
{
perror ("socket()");
return;
}
memset( &host_addr, 0, sizeof (host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons (HTTP_PORT);
host_addr.sin_addr.s_addr = inet_addr (host);
if (host_addr.sin_addr.s_addr == INADDR_NONE)
{
hostinfo = gethostbyname (host);
if (hostinfo == NULL)
{
perror ("gethostbyname()");
return;
}
memcpy((char*) &host_addr.sin_addr.s_addr, hostinfo->h_addr, hostinfo->h_length);
}
n = 0;
do
{
printf ("Before connect\n");
if (connect(sock, (struct sockaddr *) &host_addr, sizeof(struct sockaddr)) == -1)
{
printf ("After connect\n");
if(n == NUM_CONNECTIONS)
{
perror ("connect()");
return;
}
else
{
printf ("After connect\n");
n++;
}
}
}
while (n > 0);
sprintf (command, "GET %s HTTP/1.0\r\nHost: %s\r\nUser-agent: compatible\r\n\r\n", file, host);
bytes_sent = send (sock, command, strlen (command), 0);
if (bytes_sent == -1)
{
perror ("send()");
return;
}
while ((bytes_recv = recv (sock, buf, sizeof(buf), 0)) > 0)
{
if ( (bigbuf_index + bytes_recv) > 204800)
return;
memcpy(bigbuf + bigbuf_index, buf, bytes_recv);
bigbuf_index += bytes_recv;
}
bigbuf[bigbuf_index] = 0;
if (bytes_recv == -1)
{
perror ("recv()");
return;
}
char *s1 = strstr(bigbuf,"<");
if (!strcmp (SiteIdentificators[j], "Espotting"))
s1 = bigbuf;
if (!s1)
{
fprintf (stderr, "Error\n");
return;
}
if ( (!strcmp (SiteIdentificators[j], "Google")) ||
(!strcmp (SiteIdentificators[j], "Yahoo")) ||
(!strcmp (SiteIdentificators[j], "AltaVista")) ||
(!strcmp (SiteIdentificators[j], "Dmoz")) ||
(!strcmp (SiteIdentificators[j], "SearchHawk")) ||
(!strcmp (SiteIdentificators[j], "MrWordSmith")) ||
(!strcmp (SiteIdentificators[j], "MSN")) ||
(!strcmp (SiteIdentificators[j], "Espotting")) ||
(!strcmp (SiteIdentificators[j], "SearchDrifter")) )
{
n = RasborHTML (teg1[j], MAX_NUM_TEG, s1, j);
if (n != 0)
{
return;
}
}
else
n = RasborXML (teg1[j],MAX_NUM_TEG,s1, j);
if (n == PARSE_FAIL)
{
kol_parse_fail++;
if (kol_parse_fail == NUM_PF)
{
fprintf (stderr, "Failed to parse... Site %s.\n", SiteIdentificators[j]);
return;
}
}
if (n == NAMES_OF_TEGS_IS_NOT_FOUND)
{
kol_names_of_tegs++;
if (kol_names_of_tegs == NUM_NoT)
{
#ifdef WRITE
fprintf (stderr, "Names of tegs are not found. Site %s.\n", SiteIdentificators[j]);
#endif
return;
}
}
}
while (n != 0);
#ifdef _WIN32
closesocket(sock);
WSACleanup();
#else
close(sock);
#endif
/*printf("Process %d done.\n",j);*/
return;
}
Пишет
Process 1 was started.
Process 2 was started.
Before connect.
Killed
Если нужен полностью файл исходника - могу скинуть на мыло архив. Напишите адрес мыла на мыло jarunda@mail.ru.
Спасибо.