{"id":202216,"date":"2024-05-10T03:12:28","date_gmt":"2024-05-09T19:12:28","guid":{"rendered":"https:\/\/docs.pingcode.com\/ask\/ask-ask\/202216.html"},"modified":"2024-05-10T03:12:35","modified_gmt":"2024-05-09T19:12:35","slug":"%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e6%96%87%e7%ab%a0%e5%85%b3%e9%94%ae%e8%af%8d%e6%8f%90%e5%8f%96%e7%9a%84%e7%ae%97%e6%b3%95%e6%9c%89%e5%93%aa%e4%ba%9b","status":"publish","type":"post","link":"https:\/\/docs.pingcode.com\/ask\/202216.html","title":{"rendered":"\u81ea\u7136\u8bed\u8a00\u5904\u7406\u6587\u7ae0\u5173\u952e\u8bcd\u63d0\u53d6\u7684\u7b97\u6cd5\u6709\u54ea\u4e9b"},"content":{"rendered":"<p style=\"text-align:center\"><img decoding=\"async\" src=\"https:\/\/cdn-kb.worktile.com\/kb\/wp-content\/uploads\/2024\/04\/24141920\/9e8c3533-5b01-4604-98ec-e76e99c76886.webp\" alt=\"\u81ea\u7136\u8bed\u8a00\u5904\u7406\u6587\u7ae0\u5173\u952e\u8bcd\u63d0\u53d6\u7684\u7b97\u6cd5\u6709\u54ea\u4e9b\" \/><\/p>\n<p><p>\u81ea\u7136\u8bed\u8a00\u5904\u7406\uff08Natural Language Processing\uff0cNLP\uff09\u9886\u57df\u4e2d\uff0c\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\u662f\u8bc6\u522b\u548c\u63d0\u53d6\u6587\u672c\u6570\u636e\u4e2d\u6700\u91cd\u8981\u7684\u5355\u8bcd\u6216\u77ed\u8bed\u7684\u6280\u672f\u3002\u8fd9\u4e9b\u7b97\u6cd5\u80fd\u591f\u5e2e\u52a9\u7406\u89e3\u6587\u672c\u7684\u4e3b\u8981\u5185\u5bb9\u548c\u4e3b\u9898\u3002<strong>\u5173\u952e\u8bcd\u63d0\u53d6\u7684\u4e3b\u8981\u7b97\u6cd5\u5305\u62ec\uff1aTF-IDF\u3001TextRank\u3001LDA\u3001\u4ee5\u53ca\u57fa\u4e8e\u6df1\u5ea6\u5b66\u4e60\u7684\u65b9\u6cd5<\/strong>\u3002\u5728\u8fd9\u4e9b\u7b97\u6cd5\u4e2d\uff0c<strong>TF-IDF<\/strong> \u65b9\u6cd5\u56e0\u5176\u7b80\u6d01\u6027\u548c\u9ad8\u6548\u6027\u800c\u5e7f\u6cdb\u5e94\u7528\u4e8e\u81ea\u7136\u8bed\u8a00\u5904\u7406\u7684\u5404\u4e2a\u65b9\u9762\u3002<\/p>\n<\/p>\n<p><h3>\u4e00\u3001TF-IDF<\/h3>\n<\/p>\n<p><p>TF-IDF\uff08Term Frequency-Inverse Document Frequency\uff09\u662f\u4e00\u79cd\u7528\u4e8e\u4fe1\u606f\u68c0\u7d22\u4e0e\u6587\u672c\u6316\u6398\u7684\u5e38\u7528\u52a0\u6743\u6280\u672f\u3002TF\u8868\u793a\u8bcd\u9891\uff0c\u5373\u4e00\u4e2a\u8bcd\u5728\u6587\u6863\u4e2d\u7684\u51fa\u73b0\u9891\u7387\uff1bIDF\u8868\u793a\u9006\u6587\u6863\u9891\u7387\uff0c\u7528\u6765\u51cf\u5c11\u6240\u6709\u6587\u6863\u4e2d\u5171\u6709\u8bcd\u7684\u5f71\u54cd\u3002<\/p>\n<\/p>\n<ul>\n<li><strong>\u6838\u5fc3\u601d\u60f3<\/strong>\uff1a\u4e00\u4e2a\u8bcd\u8bed\u5728\u4e00\u7bc7\u6587\u7ae0\u4e2d\u51fa\u73b0\u9891\u7387\u9ad8\uff08TF\u9ad8\uff09\uff0c\u5e76\u4e14\u5728\u5176\u4ed6\u6587\u7ae0\u4e2d\u51fa\u73b0\u9891\u7387\u4f4e\uff08IDF\u9ad8\uff09\uff0c\u5219\u8ba4\u4e3a\u8fd9\u4e2a\u8bcd\u8bed\u5177\u6709\u5f88\u597d\u7684\u7c7b\u522b\u533a\u5206\u80fd\u529b\uff0c\u5bf9\u6587\u672c\u5185\u5bb9\u7684\u8868\u8ff0\u4f5c\u7528\u5927\u3002<\/li>\n<li><strong>\u5e94\u7528<\/strong>\uff1aTF-IDF\u5e7f\u6cdb\u5e94\u7528\u4e8e\u6587\u672c\u76f8\u5173\u6027\u8bc4\u4f30\u3001\u6587\u6863\u5185\u5bb9\u6458\u8981\u751f\u6210\u548c\u5173\u952e\u8bcd\u63d0\u53d6\u7b49\u9886\u57df\u3002\u5176\u7b97\u6cd5\u7684\u7b80\u6d01\u6027\u548c\u6709\u6548\u6027\uff0c\u4f7f\u5176\u6210\u4e3a\u6700\u53d7\u6b22\u8fce\u7684\u5173\u952e\u8bcd\u63d0\u53d6\u65b9\u6cd5\u4e4b\u4e00\u3002<\/li>\n<\/ul>\n<p><h3>\u4e8c\u3001TEXTRANK<\/h3>\n<\/p>\n<p><p>TextRank\u662f\u4e00\u79cd\u57fa\u4e8e\u56fe\u7684\u6392\u5e8f\u7b97\u6cd5\uff0c\u7528\u4e8e\u81ea\u7136\u8bed\u8a00\u6587\u672c\u5904\u7406\uff0c\u7075\u611f\u6765\u6e90\u4e8ePageRank\u7b97\u6cd5\u3002\u5b83\u901a\u8fc7\u6784\u5efa\u4e00\u4e2a\u6587\u672c\u4e2d\u7684\u5355\u8bcd\u6216\u77ed\u8bed\u7ec4\u6210\u7684\u56fe\u6a21\u578b\uff0c\u5229\u7528\u56fe\u8bba\u4e2d\u7684\u6392\u540d\u7b97\u6cd5\u6765\u8bc6\u522b\u6587\u672c\u7684\u5173\u952e\u90e8\u5206\u3002<\/p>\n<\/p>\n<ul>\n<li><strong>\u6784\u5efa\u56fe\u6a21\u578b<\/strong>\uff1a\u5728TextRank\u7b97\u6cd5\u4e2d\uff0c\u6587\u672c\u4e2d\u7684\u6bcf\u4e2a\u5355\u8bcd\u6216\u77ed\u8bed\u88ab\u5f53\u4f5c\u56fe\u4e2d\u7684\u4e00\u4e2a\u8282\u70b9\uff0c\u5982\u679c\u4e24\u4e2a\u8282\u70b9\u4e4b\u95f4\u5b58\u5728\u4e00\u5b9a\u7684\u5171\u73b0\u5173\u7cfb\uff0c\u5219\u5728\u8fd9\u4e24\u4e2a\u8282\u70b9\u4e4b\u95f4\u5efa\u7acb\u4e00\u4e2a\u65e0\u5411\u7684\u8fb9\u3002<\/li>\n<li><strong>\u7b97\u6cd5\u7279\u70b9<\/strong>\uff1aTextRank\u4e0d\u4f9d\u8d56\u4e8e\u4efb\u4f55\u5916\u90e8\u77e5\u8bc6\u5e93\u548c\u8bcd\u6c47\u8868\uff0c\u5b8c\u5168\u57fa\u4e8e\u6587\u672c\u7684\u7edf\u8ba1\u4fe1\u606f\uff0c\u80fd\u591f\u8f83\u597d\u5730\u628a\u63e1\u6587\u672c\u7684\u5173\u952e\u4fe1\u606f\u548c\u7ed3\u6784\u3002\u5b83\u5c24\u5176\u9002\u7528\u4e8e\u6458\u8981\u751f\u6210\u529f\u80fd\u548c\u5173\u952e\u8bcd\u63d0\u53d6\u4efb\u52a1\u3002<\/li>\n<\/ul>\n<p><h3>\u4e09\u3001LDA<\/h3>\n<\/p>\n<p><p>LDA\uff08Latent Dirichlet Allocation\uff09\u662f\u4e00\u79cd\u4e3b\u9898\u6a21\u578b\uff0c\u5b83\u53ef\u4ee5\u5c06\u6587\u6863\u96c6\u4e2d\u6bcf\u7bc7\u6587\u6863\u7684\u4e3b\u9898\u6309\u6982\u7387\u5206\u5e03\u5217\u51fa\uff0c\u8fdb\u800c\u7528\u4e8e\u5173\u952e\u8bcd\u63d0\u53d6\u3002<\/p>\n<\/p>\n<ul>\n<li><strong>\u6a21\u578b\u539f\u7406<\/strong>\uff1aLDA\u5047\u8bbe\u6587\u6863\u662f\u7531\u9690\u542b\u7684\u4e3b\u9898\u6df7\u5408\u800c\u6210\uff0c\u800c\u6bcf\u4e2a\u4e3b\u9898\u5219\u662f\u7531\u4e00\u7ec4\u5173\u952e\u8bcd\u6309\u7167\u6982\u7387\u5206\u5e03\u6784\u6210\u3002\u901a\u8fc7\u8fd9\u79cd\u65b9\u5f0f\u53ef\u4ee5\u5c06\u6587\u6863\u7684\u751f\u6210\u6a21\u62df\u51fa\u6765\u3002<\/li>\n<li><strong>\u5173\u952e\u8bcd\u63d0\u53d6<\/strong>\uff1a\u901a\u8fc7\u5206\u6790\u6587\u6863\u4e2d\u7684\u5355\u8bcd\u5bf9\u5e94\u7684\u4e3b\u9898\u5206\u5e03\uff0c\u53ef\u4ee5\u8bc6\u522b\u51fa\u4e0e\u7279\u5b9a\u4e3b\u9898\u5f3a\u76f8\u5173\u7684\u5173\u952e\u8bcd\uff0c\u4ece\u800c\u5b9e\u73b0\u5173\u952e\u8bcd\u63d0\u53d6\u7684\u76ee\u7684\u3002LDA\u7279\u522b\u9002\u7528\u4e8e\u5927\u89c4\u6a21\u6587\u6863\u96c6\u7684\u4e3b\u9898\u53d1\u73b0\u548c\u5173\u952e\u8bcd\u63d0\u53d6\u3002<\/li>\n<\/ul>\n<p><h3>\u56db\u3001\u57fa\u4e8e\u6df1\u5ea6\u5b66\u4e60\u7684\u65b9\u6cd5<\/h3>\n<\/p>\n<p><p>\u968f\u7740\u6df1\u5ea6\u5b66\u4e60\u6280\u672f\u7684\u53d1\u5c55\uff0c\u57fa\u4e8e\u6df1\u5ea6\u5b66\u4e60\u7684\u5173\u952e\u8bcd\u63d0\u53d6\u65b9\u6cd5\u4e5f\u9010\u6e10\u5174\u8d77\u3002\u8fd9\u4e9b\u65b9\u6cd5\u901a\u5e38\u4f7f\u7528\u9884\u8bad\u7ec3\u8bed\u8a00\u6a21\u578b\u5982BERT\u6216GPT\uff0c\u901a\u8fc7\u5927\u89c4\u6a21\u8bed\u6599\u8bad\u7ec3\uff0c\u5bf9\u6587\u672c\u8fdb\u884c\u8bed\u4e49\u7406\u89e3\u548c\u5173\u952e\u8bcd\u6807\u6ce8\u3002<\/p>\n<\/p>\n<ul>\n<li><strong>\u6280\u672f\u7279\u70b9<\/strong>\uff1a\u6df1\u5ea6\u5b66\u4e60\u65b9\u6cd5\u80fd\u591f\u7406\u89e3\u6587\u672c\u7684\u6df1\u5c42\u8bed\u4e49\u548c\u4e0a\u4e0b\u6587\uff0c\u63d0\u9ad8\u5173\u952e\u8bcd\u63d0\u53d6\u7684\u7cbe\u5ea6\u548c\u6548\u7387\u3002<\/li>\n<li><strong>\u5e94\u7528\u573a\u666f<\/strong>\uff1a\u9002\u7528\u4e8e\u9700\u8981\u6df1\u5c42\u6587\u672c\u7406\u89e3\u7684\u573a\u666f\uff0c\u5982\u590d\u6742\u6587\u672c\u5206\u6790\u3001\u8de8\u8bed\u8a00\u5173\u952e\u8bcd\u63d0\u53d6\u7b49\u3002\u4e0d\u8fc7\uff0c\u8fd9\u4e9b\u65b9\u6cd5\u901a\u5e38\u9700\u8981\u5927\u91cf\u7684\u8ba1\u7b97\u8d44\u6e90\u548c\u6570\u636e\u8bad\u7ec3\uff0c\u5b9e\u65bd\u6210\u672c\u8f83\u9ad8\u3002<\/li>\n<\/ul>\n<p><p>\u603b\u7ed3\u800c\u8a00\uff0cTF-IDF\u3001TextRank\u3001LDA\u53ca\u57fa\u4e8e\u6df1\u5ea6\u5b66\u4e60\u7684\u65b9\u6cd5\u662f\u76ee\u524d\u81ea\u7136\u8bed\u8a00\u5904\u7406\u9886\u57df\u4e2d\u5e38\u89c1\u4e14\u6709\u6548\u7684\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\u3002\u6bcf\u79cd\u7b97\u6cd5\u90fd\u6709\u5176\u72ec\u7279\u7684\u4f18\u70b9\u548c\u5e94\u7528\u573a\u666f\uff0c\u5f00\u53d1\u8005\u53ef\u4ee5\u6839\u636e\u5b9e\u9645\u9700\u6c42\u548c\u8d44\u6e90\u60c5\u51b5\u9009\u62e9\u6700\u5408\u9002\u7684\u65b9\u6cd5\u3002<\/p>\n<\/p>\n<h2><strong>\u76f8\u5173\u95ee\u7b54FAQs\uff1a<\/strong><\/h2>\n<p><strong>1. \u4ec0\u4e48\u662f\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e2d\u7684\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\uff1f<\/strong><\/p>\n<p>\u5173\u952e\u8bcd\u63d0\u53d6\u662f\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e2d\u7684\u4e00\u4e2a\u91cd\u8981\u4efb\u52a1\uff0c\u5176\u76ee\u6807\u662f\u4ece\u7ed9\u5b9a\u7684\u6587\u672c\u4e2d\u81ea\u52a8\u63d0\u53d6\u51fa\u6700\u80fd\u4ee3\u8868\u8be5\u6587\u672c\u4e3b\u9898\u5185\u5bb9\u7684\u5173\u952e\u8bcd\u3002\u8fd9\u4e9b\u5173\u952e\u8bcd\u901a\u5e38\u662f\u5177\u6709\u7279\u6b8a\u542b\u4e49\uff0c\u80fd\u591f\u51c6\u786e\u6982\u62ec\u6587\u672c\u4e3b\u9898\u7684\u8bcd\u8bed\u6216\u77ed\u8bed\u3002<\/p>\n<p><strong>2. \u5e38\u7528\u7684\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\u6709\u54ea\u4e9b\uff1f<\/strong><\/p>\n<p>\u5728\u81ea\u7136\u8bed\u8a00\u5904\u7406\u9886\u57df\uff0c\u6709\u8bb8\u591a\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\u53ef\u4f9b\u9009\u62e9\uff0c\u6bcf\u79cd\u7b97\u6cd5\u90fd\u6709\u5176\u72ec\u7279\u7684\u4f18\u7f3a\u70b9\u3002\u5e38\u7528\u7684\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\u5305\u62ec\uff1a<\/p>\n<ul>\n<li>\u57fa\u4e8e\u9891\u7387\u7684\u7b97\u6cd5\uff1a\u901a\u8fc7\u7edf\u8ba1\u6587\u672c\u4e2d\u8bcd\u8bed\u7684\u51fa\u73b0\u9891\u7387\uff0c\u63d0\u53d6\u51fa\u9891\u7387\u8f83\u9ad8\u7684\u8bcd\u8bed\u4f5c\u4e3a\u5173\u952e\u8bcd\u3002\u8fd9\u79cd\u65b9\u6cd5\u7b80\u5355\u76f4\u89c2\uff0c\u4f46\u53ef\u80fd\u4f1a\u5ffd\u7565\u4e00\u4e9b\u91cd\u8981\u7684\u4f46\u51fa\u73b0\u9891\u7387\u8f83\u4f4e\u7684\u8bcd\u8bed\u3002<\/li>\n<li>\u57fa\u4e8eTF-IDF\u7684\u7b97\u6cd5\uff1aTF-IDF\uff08Term Frequency-Inverse Document Frequency\uff09\u662f\u4e00\u79cd\u5e38\u7528\u7684\u8bcd\u8bed\u6743\u91cd\u8ba1\u7b97\u65b9\u6cd5\uff0c\u901a\u8fc7\u7ed3\u5408\u8bcd\u8bed\u5728\u6587\u672c\u4e2d\u7684\u9891\u7387\u548c\u5728\u6574\u4e2a\u8bed\u6599\u5e93\u4e2d\u7684\u9891\u7387\uff0c\u53cd\u6620\u8bcd\u8bed\u5bf9\u4e8e\u6587\u672c\u7684\u91cd\u8981\u6027\u3002\u57fa\u4e8eTF-IDF\u7684\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\u4f1a\u5c06TF-IDF\u503c\u8f83\u9ad8\u7684\u8bcd\u8bed\u4f5c\u4e3a\u5173\u952e\u8bcd\u3002<\/li>\n<li>\u57fa\u4e8e\u6587\u672c\u6458\u8981\u7684\u7b97\u6cd5\uff1a\u8fd9\u79cd\u7b97\u6cd5\u8bd5\u56fe\u4ece\u6587\u672c\u4e2d\u63d0\u53d6\u51fa\u6700\u80fd\u4ee3\u8868\u6587\u672c\u4e3b\u9898\u7684\u53e5\u5b50\u6216\u77ed\u8bed\u4f5c\u4e3a\u5173\u952e\u8bcd\u3002\u5e38\u7528\u7684\u65b9\u6cd5\u5305\u62ec\u6587\u672c\u6458\u8981\u7b97\u6cd5\u548c\u53e5\u5b50\u91cd\u8981\u6027\u8ba1\u7b97\u7b97\u6cd5\u3002<\/li>\n<\/ul>\n<p><strong>3. \u5982\u4f55\u9009\u62e9\u9002\u5408\u7684\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\uff1f<\/strong><\/p>\n<p>\u9009\u62e9\u9002\u5408\u7684\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\u5e94\u6839\u636e\u5177\u4f53\u7684\u60c5\u51b5\u548c\u9700\u6c42\u3002\u5982\u679c\u5bf9\u63d0\u53d6\u51fa\u7684\u5173\u952e\u8bcd\u6570\u91cf\u6ca1\u6709\u5177\u4f53\u8981\u6c42\uff0c\u4e14\u53ea\u662f\u5e0c\u671b\u5feb\u901f\u83b7\u5f97\u4e00\u4e2a\u5927\u81f4\u7684\u5173\u952e\u8bcd\u5217\u8868\uff0c\u53ef\u4ee5\u8003\u8651\u4f7f\u7528\u57fa\u4e8e\u9891\u7387\u7684\u7b97\u6cd5\u3002\u5982\u679c\u5e0c\u671b\u63d0\u53d6\u51fa\u4e0e\u6587\u672c\u4e3b\u9898\u66f4\u76f8\u5173\u7684\u8bcd\u8bed\uff0c\u53ef\u4ee5\u9009\u62e9\u57fa\u4e8eTF-IDF\u7684\u7b97\u6cd5\u3002\u5982\u679c\u5e0c\u671b\u4ece\u6587\u672c\u4e2d\u63d0\u53d6\u51fa\u6700\u80fd\u6982\u62ec\u6587\u672c\u4e3b\u9898\u7684\u53e5\u5b50\u6216\u77ed\u8bed\uff0c\u53ef\u4ee5\u8003\u8651\u4f7f\u7528\u57fa\u4e8e\u6587\u672c\u6458\u8981\u7684\u7b97\u6cd5\u3002\u6b64\u5916\uff0c\u8fd8\u53ef\u4ee5\u5c1d\u8bd5\u7ed3\u5408\u591a\u79cd\u7b97\u6cd5\uff0c\u4ee5\u83b7\u5f97\u66f4\u5168\u9762\u51c6\u786e\u7684\u5173\u952e\u8bcd\u5217\u8868\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"\u81ea\u7136\u8bed\u8a00\u5904\u7406\uff08Natural Language Processing\uff0cNLP\uff09\u9886\u57df\u4e2d\uff0c\u5173\u952e\u8bcd\u63d0\u53d6\u7b97\u6cd5\u662f\u8bc6\u522b\u548c\u63d0 [&hellip;]","protected":false},"author":3,"featured_media":202222,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[37],"tags":[],"acf":[],"_links":{"self":[{"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/posts\/202216"}],"collection":[{"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/comments?post=202216"}],"version-history":[{"count":0,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/posts\/202216\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/media\/202222"}],"wp:attachment":[{"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/media?parent=202216"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/categories?post=202216"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/tags?post=202216"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}