php 依据姓名猜测性别

 
更多

<?php
/**
 * Gender Guesser
 *
 * This class can guess the gender of chinese names.
 *
 * Blog Entries: http://blog.wudilabs.org/tag/genderguesser/
 * PHP Classes: http://www.phpclasses.org/browse/package/2701.html
 *
 * PHP versions 5
 *
 * LICENSE: This program is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License
 * version 2 as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * @author      Wudi <wudi@wudilabs.org>
 * @copyright   2005-2014 Wudi Labs
 * @license     http://www.gnu.org/licenses/gpl-2.0.html  GPL v2
 * @version     v0.10.0
 * @link        http://blog.wudilabs.org/tag/genderguesser/
 */

/**
 * Gender Guesser
 *
 * @author      Wudi <wudi@wudilabs.org>
 * @copyright   2005-2014 Wudi Labs
 * @license     http://www.gnu.org/licenses/gpl-2.0.html  GPL v2
 */
class GenderGuesser {
    private $_options = array('s' => true);
    private $_lexicon_comment = '';
    private $_lexicon_chars = array();

    // {{{ constructor

    /**
     * 构造函数
     *
     * @param mixed  $options      选项
     * @param string $lexicon_path 词典文件的路径
     */
    function __construct($options = null, $lexicon_path = null) {
        if (!is_null($options)) {
            $this->setOptions($options);
        }

        if (!is_null($lexicon_path)) {
            $this->loadLexicon($lexicon_path);
        }
    }

    // }}}

    // {{{ setOptions()

    /**
     * 设置选项
     *
     * options 可以为数组或者字符串。若使用字符串,各选项用空格分隔。
     *
     * 若要设置某选项为关闭,在代表该选项的字符前加减号“-”。可设置的选项如下:
     * s - name 参数包含姓氏。
     *
     * @param mixed $options 选项
     *
     * @return bool 成功返回 true,失败返回 false
     */
    public function setOptions($options) {
        if (is_string($options)) {
            $options = explode(' ', $options);
        }
        if (is_array($options)) {
            foreach ($options as $option) {
                if ($option[0] == '-') {
                    $this->_options[$option[1]] = false;
                } else {
                    $this->_options[$option[0]] = true;
                }
            }
        } else {
            return false;
        }
    }

    // }}}

    // {{{ loadLexicon()

    /**
     * 加载词典
     *
     * @param string $path 词典文件的路径
     *
     * @return bool 成功返回 true,失败返回 false
     */
    public function loadLexicon($path) {
        if (!file_exists($path)) {
            return false;
        }
        $data_serialized = file_get_contents($path);
        if (substr($data_serialized, 0, 5) != 'a:5:{') {
            return false;
        }
        $data = unserialize($data_serialized);
        if (!array_key_exists('signature', $data) || ($data['signature'] != 'GENDER_GUESSER_LEXICON')) {
            return false;
        }
        if (!array_key_exists('version', $data) || ($data['version'] != 3)) {
            return false;
        }
        $this->_lexicon_comment = $data['comment'];
        $this->_lexicon_chars = $data['chars'];
        return true;
    }

    // }}}

    // {{{ getLexiconComment()

    /**
     * 获取所加载词典的注释信息
     *
     * @return string 词典的注释文本
     */
    public function getLexiconComment() {
        return $this->_lexicon_comment;
    }

    // }}}

    // {{{ getMaleProbability()

    /**
     * 获取给定姓名为男性姓名的概率
     *
     * @param string $name 姓名 (UTF-8 编码)
     *
     * @return float 给定姓名为男性姓名的概率
     */
    public function getMaleProbability($name) {
        if (count($this->_lexicon_chars) == 0) {
            return false;
        }
        $name_length = mb_strlen($name, 'UTF-8');
        $chars = array();
        for ($i = 0; $i < $name_length; $i++) {
            $chars[] = mb_substr($name, $i, 1, 'UTF-8');
        }
        if ($this->_options['s']) {
            if ((count($chars) < 2) || (count($chars) > 4)) {
                return false;
            }
            if (count($chars) == 4) {
                array_shift($chars);
                array_shift($chars);
            } else {
                array_shift($chars);
            }
        } else {
            if ((count($chars) < 1) || (count($chars) > 2)) {
                return false;
            }
        }
        $prob = false;
        if (count($chars) == 1) {
            if (array_key_exists($chars[0], $this->_lexicon_chars)) {
                $prob = $this->_lexicon_chars[$chars[0]];
            } else {
                $prob = 0.5;
            }
        } else {
            if (array_key_exists($chars[0], $this->_lexicon_chars)) {
                $prob = $this->_lexicon_chars[$chars[0]] * 0.45;
            } else {
                $prob = 0.5 * 0.45;
            }
            if (array_key_exists($chars[1], $this->_lexicon_chars)) {
                $prob += $this->_lexicon_chars[$chars[1]] * (1 - 0.45);
            } else {
                $prob += 0.5 * (1 - 0.45);
            }
        }
        return $prob;
    }

    // }}}
}

?>
打赏

本文固定链接: https://www.cxy163.net/archives/1301 | 绝缘体

该日志由 绝缘体.. 于 2016年09月08日 发表在 首页 分类下,
原创文章转载请注明: php 依据姓名猜测性别 | 绝缘体

报歉!评论已关闭.