牛骨文教育服务平台(让学习变的简单)
博文笔记

php7.0对于敏感词扩展库的兼容

创建时间:2017-03-16 投稿人: 浏览次数:1671

在PHP7.0以后的版本,因为有些方法被废弃,有些方法的参数变掉了,导致,原有的基于PHP5.3版本的敏感词库编译不能通过,因为项目中需要用到敏感词,这几天恶补了下PHP7.0的源码,把敏感词扩展库自己修改了下,目前验证通过了。现在记录下来。

1、php_trie_filter_dtor

static void php_trie_filter_dtor(zend_rsrc_list_entry *rsrc TSRMLS_DC){ Trie *trie = (Trie *)rsrc->ptr; trie_free(trie); } 改成了 static void php_trie_filter_dtor(zend_resource *rsrc TSRMLS_DC) { Trie *trie = (Trie *)rsrc->ptr; trie_free(trie); } 我们发现该函数的参数类型是 zend_resource 。这是 PHP7 新增的数据结构,在 PHP 5 则是 zend_rsrc_list_entry

2、trie_filter_load

PHP_FUNCTION(trie_filter_load) { Trie *trie; char *path; int path_len; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &path, &path_len) == FAILURE) { RETURN_NULL(); } trie = trie_new_from_file(path); if (!trie) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to load %s", path); RETURN_NULL(); } ZEND_REGISTER_RESOURCE(return_value, trie, le_trie_filter); } 改成了 PHP_FUNCTION(trie_filter_load) { Trie *trie; char *path; int path_len; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &path, &path_len) == FAILURE) { RETURN_NULL(); } trie = trie_new_from_file(path); if (!trie) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to load %s", path); RETURN_NULL(); } RETURN_RES(zend_register_resource(trie, le_trie_filter)); } 在 PHP 7 中删除了原来的 ZEND_REGISTER_RESOURCE 宏,直接使用 zend_register_resource 函数
ZEND_API zend_resource* zend_register_resource(void *rsrc_pointer, int rsrc_type)
参数 解释
rsrc_pointer 资源数据指针
rsrc_type 注册资源类型时获得的资源类型 id
3、trie_search_all static int trie_search_all(Trie *trie, const AlphaChar *text, zval *data) { TrieState *s; const AlphaChar *p; const AlphaChar *base; zval *word = NULL; base = text; if (! (s = trie_root(trie))) { return -1; } while (*text) { p = text; if(! trie_state_is_walkable(s, *p)) { trie_state_rewind(s); text++; continue; } while(*p && trie_state_is_walkable(s, *p) && ! trie_state_is_leaf(s)) { trie_state_walk(s, *p++); if (trie_state_is_terminal(s)) { MAKE_STD_ZVAL(word); array_init_size(word, 3); add_next_index_long(word, text - base); add_next_index_long(word, p - text); add_next_index_zval(data, word); } } trie_state_rewind(s); text++; } trie_state_free(s); return 0; } 改成了 static int trie_search_all(Trie *trie, const AlphaChar *text, zval *data) { TrieState *s; const AlphaChar *p; const AlphaChar *base; zval word; base = text; if (! (s = trie_root(trie))) { return -1; } while (*text) { p = text; if(! trie_state_is_walkable(s, *p)) { trie_state_rewind(s); text++; continue; } while(*p && trie_state_is_walkable(s, *p) && ! trie_state_is_leaf(s)) { trie_state_walk(s, *p++); if (trie_state_is_terminal(s)) { array_init_size(&word, 3); add_next_index_long(&word, text - base); add_next_index_long(&word, p - text); add_next_index_zval(data, &word); } } trie_state_rewind(s); text++; } trie_state_free(s); return 0; } 这里几个函数暂时未搞明白什么意思 4、trie_filter_search PHP_FUNCTION(trie_filter_search) { Trie *trie; zval *trie_resource; unsigned char *text; int text_len; int offset = -1, i, ret; TrieData length = 0; AlphaChar *alpha_text; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rs", &trie_resource, &text, &text_len) == FAILURE) { RETURN_FALSE; } array_init(return_value); if (text_len < 1 || strlen(text) != text_len) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "input is empty"); return; } ZEND_FETCH_RESOURCE(trie, Trie *, &trie_resource, -1, PHP_TRIE_FILTER_RES_NAME, le_trie_filter); alpha_text = emalloc(sizeof(AlphaChar) * text_len + 1); for (i = 0; i < text_len; i++) { alpha_text[i] = (AlphaChar) text[i]; } alpha_text[text_len] = TRIE_CHAR_TERM; ret = trie_search_one(trie, alpha_text, &offset, &length); efree(alpha_text); if (ret == 0) { return; } else if (ret == 1) { add_next_index_long(return_value, offset); add_next_index_long(return_value, length); } else { RETURN_FALSE; } } 改成了 PHP_FUNCTION(trie_filter_search) { Trie *trie; zval *trie_resource; unsigned char *text; int text_len; int offset = -1, i, ret; TrieData length = 0; AlphaChar *alpha_text; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rs", &trie_resource, &text, &text_len) == FAILURE) { RETURN_FALSE; } array_init(return_value); if (text_len < 1 || strlen(text) != text_len) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "input is empty"); return; } trie = (Trie *)zend_fetch_resource(Z_RES_P(trie_resource), PHP_TRIE_FILTER_RES_NAME, le_trie_filter); alpha_text = emalloc(sizeof(AlphaChar) * (text_len + 1)); for (i = 0; i < text_len; i++) { alpha_text[i] = (AlphaChar) text[i]; } alpha_text[text_len] = TRIE_CHAR_TERM; ret = trie_search_one(trie, alpha_text, &offset, &length); efree(alpha_text); if (ret == 0) { return; } else if (ret == 1) { add_next_index_long(return_value, offset); add_next_index_long(return_value, length); } else { RETURN_FALSE; } } 使用资源
ZEND_API void *zend_fetch_resource(zend_resource *res, const char *resource_type_name, int resource_type)
在 PHP 7 中删除了原有的 ZEND_FETCH_RESOURCE 宏,直接使用函数 zend_fetch_resource ,而且解析方式也变得简单了很多,想比 PHP 5 要高效很多,后面我们再通过图片分析对比。
参数 含义
res 资源指针
resource_type_name 该类资源的字符串别名
resource_type 该类资源的类型 id
5、Trie_filter_search_all PHP_FUNCTION(trie_filter_search_all) { Trie *trie; zval *trie_resource; unsigned char *text; int text_len; int i, ret; AlphaChar *alpha_text; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rs", &trie_resource, &text, &text_len) == FAILURE) { RETURN_FALSE; } array_init(return_value); if (text_len < 1 || strlen(text) != text_len) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "input is empty"); return; } ZEND_FETCH_RESOURCE(trie, Trie *, &trie_resource, -1, PHP_TRIE_FILTER_RES_NAME, le_trie_filter); alpha_text = emalloc(sizeof(AlphaChar) * text_len + 1); for (i = 0; i < text_len; i++) { alpha_text[i] = (AlphaChar) text[i]; } alpha_text[text_len] = TRIE_CHAR_TERM; ret = trie_search_all(trie, alpha_text, return_value); efree(alpha_text); if (ret == 0) { return; } else { RETURN_FALSE; } } 改成了 PHP_FUNCTION(trie_filter_search_all) { Trie *trie; zval *trie_resource; unsigned char *text; int text_len; int i, ret; AlphaChar *alpha_text; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rs", &trie_resource, &text, &text_len) == FAILURE) { RETURN_FALSE; } array_init(return_value); if (text_len < 1 || strlen(text) != text_len) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "input is empty"); return; } trie = (Trie *)zend_fetch_resource(Z_RES_P(trie_resource), PHP_TRIE_FILTER_RES_NAME, le_trie_filter); alpha_text = emalloc(sizeof(AlphaChar) * (text_len + 1)); for (i = 0; i < text_len; i++) { alpha_text[i] = (AlphaChar) text[i]; } alpha_text[text_len] = TRIE_CHAR_TERM; ret = trie_search_all(trie, alpha_text, return_value); efree(alpha_text); if (ret == 0) { return; } else { RETURN_FALSE; } } 6、Trie_filter_new PHP_FUNCTION(trie_filter_new) { Trie *trie; AlphaMap *alpha_map; int ret; alpha_map = alpha_map_new(); if (! alpha_map) { RETURN_NULL(); } if (alpha_map_add_range(alpha_map, 0x00, 0xff) != 0) { /* treat all strings as byte stream */ alpha_map_free(alpha_map); RETURN_NULL(); } trie = trie_new(alpha_map); alpha_map_free(alpha_map); if (! trie) { RETURN_NULL(); } ZEND_REGISTER_RESOURCE(return_value, trie, le_trie_filter); } 改成了 PHP_FUNCTION(trie_filter_new) { Trie *trie; AlphaMap *alpha_map; int ret; alpha_map = alpha_map_new(); if (! alpha_map) { RETURN_NULL(); } if (alpha_map_add_range(alpha_map, 0x00, 0xff) != 0) { /* treat all strings as byte stream */ alpha_map_free(alpha_map); RETURN_NULL(); } trie = trie_new(alpha_map); alpha_map_free(alpha_map); if (! trie) { RETURN_NULL(); } RETURN_RES(zend_register_resource(trie, le_trie_filter)); } 资源的注册 在 PHP 7 中删除了原来的 ZEND_REGISTER_RESOURCE 宏,直接使用 zend_register_resource 函数
ZEND_API zend_resource* zend_register_resource(void *rsrc_pointer, int rsrc_type)
参数解释
rsrc_pointer资源数据指针
rsrc_type注册资源类型时获得的资源类型 id
7、Trie_filter_store PHP_FUNCTION(trie_filter_store) { Trie *trie; zval *trie_resource; unsigned char *keyword, *p; int keyword_len, i; AlphaChar alpha_key[KEYWORD_MAX_LEN+1]; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rs", &trie_resource, &keyword, &keyword_len) == FAILURE) { RETURN_FALSE; } if (keyword_len > KEYWORD_MAX_LEN || keyword_len < 1) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "keyword should has [1, %d] bytes", KEYWORD_MAX_LEN); RETURN_FALSE; } ZEND_FETCH_RESOURCE(trie, Trie *, &trie_resource, -1, PHP_TRIE_FILTER_RES_NAME, le_trie_filter); p = keyword; i = 0; while (*p && *p != " " && *p != " ") { alpha_key[i++] = (AlphaChar)*p; p++; } alpha_key[i] = TRIE_CHAR_TERM; if (! trie_store(trie, alpha_key, -1)) { RETURN_FALSE; } RETURN_TRUE; } 改成了 PHP_FUNCTION(trie_filter_store) { Trie *trie; zval *trie_resource; unsigned char *keyword, *p; int keyword_len, i; AlphaChar alpha_key[KEYWORD_MAX_LEN+1]; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rs", &trie_resource,&keyword, &keyword_len) == FAILURE) { RETURN_FALSE; } if (keyword_len > KEYWORD_MAX_LEN || keyword_len < 1) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "keyword should has [1, %d] bytes", KEYWORD_MAX_LEN); RETURN_FALSE; } trie = (Trie *)zend_fetch_resource(Z_RES_P(trie_resource), PHP_TRIE_FILTER_RES_NAME, le_trie_filter); p = keyword; i = 0; while (*p && *p != " " && *p != " ") { alpha_key[i++] = (AlphaChar)*p; p++; } alpha_key[i] = TRIE_CHAR_TERM; if (! trie_store(trie, alpha_key, -1)) { RETURN_FALSE; } RETURN_TRUE; } 8、Trie_filter_save PHP_FUNCTION(trie_filter_save) { Trie *trie; zval *trie_resource; unsigned char *filename; int filename_len; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rs", &trie_resource, &filename, &filename_len) == FAILURE) { RETURN_FALSE; } if (filename_len < 1 || strlen(filename) != filename_len) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "save path required"); RETURN_FALSE; } ZEND_FETCH_RESOURCE(trie, Trie *, &trie_resource, -1, PHP_TRIE_FILTER_RES_NAME, le_trie_filter); if (trie_save(trie, filename)) { RETURN_FALSE; } RETURN_TRUE; } 改成 PHP_FUNCTION(trie_filter_save) { Trie *trie; zval *trie_resource; unsigned char *filename; int filename_len; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rs",&trie_resource, &filename, &filename_len) == FAILURE) { RETURN_FALSE; } if (filename_len < 1 || strlen(filename) != filename_len) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "save path required"); RETURN_FALSE; } trie = (Trie *)zend_fetch_resource(Z_RES_P(trie_resource), PHP_TRIE_FILTER_RES_NAME, le_trie_filter); if (trie_save(trie, filename)) { RETURN_FALSE; } RETURN_TRUE; }
9、Trie_filter_free PHP_FUNCTION(trie_filter_free) { Trie *trie; zval *trie_resource; int resource_id; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "r", &trie_resource) == FAILURE) { RETURN_FALSE; } ZEND_FETCH_RESOURCE(trie, Trie *, &trie_resource, -1, PHP_TRIE_FILTER_RES_NAME, le_trie_filter); resource_id = Z_RESVAL_P(trie_resource); if (zend_list_delete(resource_id) == SUCCESS) { RETURN_TRUE; } RETURN_FALSE; } 改成 PHP_FUNCTION(trie_filter_free) { Trie *trie; zval *trie_resource; int resource_id;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "r", &trie_resource) == FAILURE) { RETURN_FALSE; } trie = (Trie *)zend_fetch_resource(Z_RES_P(trie_resource), PHP_TRIE_FILTER_RES_NAME, le_trie_filter); resource_id = Z_RES_P(trie_resource); if (zend_list_delete(resource_id) == SUCCESS) { RETURN_TRUE; } RETURN_FALSE; }









声明:该文观点仅代表作者本人,牛骨文系教育信息发布平台,牛骨文仅提供信息存储空间服务。