_date[0].rm_so), -1);
int n_size = strlen(s_phone);
(*phone) = (char*) malloc(n_size + 1);
memset((*phone), 0, n_size);
strcpy((*phone), s_phone);
free(s_phone);
return 0;
}
/**
* @biref 简体中文文本网页时间提取
* 正则表达式说明如下
* [0-9]{2,4}(-|/|年) //年份
* [0-9]{1,2}(-|/|月) //月份
* [0-9]{1,2} //日
* [0-9]{1,2} //小时
* :[0-9]{1,2} //分钟
* (:[0-9]{1,2}) //钞
* @demo
* char* datetime = NULL;
* datetime_extract("2011年3月7日 14:03", &datetime);
* printf("format datetime = [%s]\n", datetime);
* free(datetime);
* @param str 待处理的字符串
* @param date [output] 提取出的日期时间子串,
* 其格式为“yy-MM-dd hh-mm-ss”的字符串(要注意在外面释放其堆内存)
* @return 返回0表示成功,否则出错。
*/
int datetime_extract(const char* str, char** date)
{
char* datetime = (char*) malloc(20);
memset(datetime, 0, 20);
*date = datetime;
int year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0;
char* s_num = NULL;
const char* pattern_date = "[0-9]{2,4}(-|/|年)[0-9]{1,2}(-|/|月)[0-9]{1,2}";
const char* pattern_time = "[0-9]{1,2}:[0-9]{1,2}(:[0-9]{1,2}) ";
int z = 0;
regex_t reg_date, reg_time;
regmatch_t pm_date[1], pm_time[1];
// 提取日期
regcomp(®_date, pattern_date, REG_EXTENDED);
z = regexec(®_date, str, 1, pm_date, 0);
regfree(®_date);
if(0 != z)
{
fprintf(stderr, " invalid date format: [%s]\n", str);
strcpy(datetime, "2000-01-01 00:00:00");
return -1;
}
char* s_date = NULL;
return_if_fail(s_date = substr(str,
pm_date[0].rm_so,
pm_date[0].rm_eo - pm_date[0].rm_so), -1);
int n_offset = 1;
int date_l = 0;
int date_r = 0;
date_l = find(s_date, "-");
date_r = rfind(s_date, "-");
if(-1 == date_l)
{
date_l = find(s_date, "/");
}
if(-1 == date_r || date_r == date_l)
{
date_r = rfind(s_date, "/");
}
if(-1 == date_l)
{
date_l = find(s_date, "年");
n_offset = strlen("年");
}
if(-1 == date_r || date_r == date_l)
{
date_r = find(s_date, "月");
n_offset = strlen("月");
}
return_if_fail(s_num = substr(s_date, 0, date_l - 0), -1);
year = atoi(s_num); free(s_num);
if(year < 100) { year += 2000; };
return_if_fail(s_num = substr(s_date, \
date_l + n_offset, date_r - date_l - n_offset), -1);
month = atoi(s_num); free(s_num);
return_if_fail(s_num = substr(s_date,
date_r + n_offset, strlen(s_date) - date_r - n_offset), -1);
day = atoi(s_num); free(s_num);
free(s_date);
// 提取时间
regcomp(®_time, pattern_time, REG_EXTENDED);
z = regexec(®_time, str, 1, pm_time, 0);
regfree(®_time);
if(0 != z)
{
fprintf(stderr, " invalid time format: [%s]\n", str);
hour = 0;
minute = 0;
second = 0;
sprintf(datetime, "%04d-%02d-%02d %02d:%02d:%02d",
year, month, day, hour, minute, second);
return 0;
}
char* s_time = NULL;
return_if_fail(s_time = substr(str, \
pm_time[0].rm_so, pm_time[0].rm_eo - pm_time[0].rm_so), -1);
int time_l = find(s_time, ":");
int time_r = rfind(s_time, ":");
return_if_fail(s_num = su