{"id":1166418,"date":"2025-01-15T15:31:56","date_gmt":"2025-01-15T07:31:56","guid":{"rendered":"https:\/\/docs.pingcode.com\/ask\/ask-ask\/1166418.html"},"modified":"2025-01-15T15:31:59","modified_gmt":"2025-01-15T07:31:59","slug":"%e6%95%b0%e6%8d%ae%e6%b8%85%e6%b4%97python%e5%a6%82%e4%bd%95%e5%8e%bb%e9%87%8d","status":"publish","type":"post","link":"https:\/\/docs.pingcode.com\/ask\/1166418.html","title":{"rendered":"\u6570\u636e\u6e05\u6d17python\u5982\u4f55\u53bb\u91cd"},"content":{"rendered":"<p style=\"text-align:center;\" ><img decoding=\"async\" src=\"https:\/\/cdn-kb.worktile.com\/kb\/wp-content\/uploads\/2024\/04\/25210538\/d86a683a-fd88-4cba-8312-dd58793768d4.webp\" alt=\"\u6570\u636e\u6e05\u6d17python\u5982\u4f55\u53bb\u91cd\" \/><\/p>\n<p><p> \u5728\u6570\u636e\u6e05\u6d17\u8fc7\u7a0b\u4e2d\uff0c\u6570\u636e\u53bb\u91cd\u662f\u4e00\u4e2a\u975e\u5e38\u91cd\u8981\u7684\u6b65\u9aa4\u3002<strong>Python \u63d0\u4f9b\u4e86\u591a\u79cd\u65b9\u6cd5\u6765\u5b9e\u73b0\u6570\u636e\u53bb\u91cd\uff0c\u5305\u62ec\u4f7f\u7528 Pandas \u5e93\u3001set \u6570\u636e\u7ed3\u6784\u3001\u5b57\u5178\u7b49<\/strong>\u3002\u5176\u4e2d Pandas \u5e93\u662f\u6700\u5e38\u7528\u7684\u5de5\u5177\uff0c\u56e0\u4e3a\u5b83\u4e3a\u6570\u636e\u5904\u7406\u63d0\u4f9b\u4e86\u4e30\u5bcc\u4e14\u7b80\u4fbf\u7684\u51fd\u6570\u3002<strong>\u4f7f\u7528 Pandas \u7684 <code>drop_duplicates<\/code> \u65b9\u6cd5\u53ef\u4ee5\u975e\u5e38\u65b9\u4fbf\u5730\u8fdb\u884c\u6570\u636e\u53bb\u91cd<\/strong>\u3002\u4e0b\u9762\u5c06\u8be6\u7ec6\u4ecb\u7ecd\u5982\u4f55\u4f7f\u7528 Pandas \u8fdb\u884c\u6570\u636e\u53bb\u91cd\u3002<\/p>\n<\/p>\n<p><h3>\u4e00\u3001Pandas\u5e93\u7684\u5e94\u7528<\/h3>\n<\/p>\n<p><h4>1\u3001\u5b89\u88c5\u548c\u5bfc\u5165Pandas<\/h4>\n<\/p>\n<p><p>\u9996\u5148\uff0c\u786e\u4fdd\u4f60\u5df2\u7ecf\u5b89\u88c5\u4e86 Pandas \u5e93\u3002\u5982\u679c\u6ca1\u6709\u5b89\u88c5\uff0c\u53ef\u4ee5\u4f7f\u7528\u4ee5\u4e0b\u547d\u4ee4\u8fdb\u884c\u5b89\u88c5\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">pip install pandas<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><p>\u7136\u540e\u5728\u4ee3\u7801\u4e2d\u5bfc\u5165 Pandas\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">import pandas as pd<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>2\u3001\u8bfb\u53d6\u6570\u636e<\/h4>\n<\/p>\n<p><p>\u5047\u8bbe\u6211\u4eec\u6709\u4e00\u4e2a\u5305\u542b\u91cd\u590d\u6570\u636e\u7684 CSV \u6587\u4ef6\uff0c\u53ef\u4ee5\u4f7f\u7528 Pandas \u8bfb\u53d6\u6570\u636e\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">df = pd.read_csv(&#39;your_file.csv&#39;)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>3\u3001\u4f7f\u7528drop_duplicates\u65b9\u6cd5\u53bb\u91cd<\/h4>\n<\/p>\n<p><p>Pandas \u63d0\u4f9b\u4e86 <code>drop_duplicates<\/code> \u65b9\u6cd5\u6765\u5220\u9664\u91cd\u590d\u884c\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">df_cleaned = df.drop_duplicates()<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><p>\u8fd9\u4e2a\u65b9\u6cd5\u4f1a\u9ed8\u8ba4\u5220\u9664\u6240\u6709\u5217\u5b8c\u5168\u76f8\u540c\u7684\u91cd\u590d\u884c\u3002\u5982\u679c\u4f60\u53ea\u60f3\u57fa\u4e8e\u67d0\u4e9b\u5217\u6765\u53bb\u91cd\uff0c\u53ef\u4ee5\u901a\u8fc7 <code>subset<\/code> \u53c2\u6570\u6307\u5b9a\u5217\u540d\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">df_cleaned = df.drop_duplicates(subset=[&#39;column1&#39;, &#39;column2&#39;])<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><p><code>drop_duplicates<\/code> \u65b9\u6cd5\u8fd8\u6709\u4e00\u4e9b\u5176\u4ed6\u53c2\u6570\uff0c\u4f8b\u5982 <code>keep<\/code>\uff0c\u5b83\u53ef\u4ee5\u6307\u5b9a\u4fdd\u7559\u91cd\u590d\u9879\u7684\u54ea\u4e00\u884c\uff08\u7b2c\u4e00\u884c\u6216\u6700\u540e\u4e00\u884c\uff09\uff0c\u9ed8\u8ba4\u503c\u662f <code>&#39;first&#39;<\/code>\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">df_cleaned = df.drop_duplicates(subset=[&#39;column1&#39;, &#39;column2&#39;], keep=&#39;first&#39;)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><p>\u4f60\u4e5f\u53ef\u4ee5\u901a\u8fc7 <code>inplace<\/code> \u53c2\u6570\u76f4\u63a5\u4fee\u6539\u539f\u6570\u636e\u6846\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">df.drop_duplicates(subset=[&#39;column1&#39;, &#39;column2&#39;], keep=&#39;first&#39;, inplace=True)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h3>\u4e8c\u3001\u4f7f\u7528set\u6570\u636e\u7ed3\u6784\u53bb\u91cd<\/h3>\n<\/p>\n<p><h4>1\u3001\u5217\u8868\u53bb\u91cd<\/h4>\n<\/p>\n<p><p>\u5982\u679c\u4f60\u7684\u6570\u636e\u662f\u4e00\u4e2a\u5217\u8868\uff0c\u53ef\u4ee5\u4f7f\u7528 <code>set<\/code> \u6765\u53bb\u91cd\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">data = [1, 2, 2, 3, 4, 4, 5]<\/p>\n<p>data_unique = list(set(data))<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>2\u3001\u5b57\u5178\u53bb\u91cd<\/h4>\n<\/p>\n<p><p>\u5bf9\u4e8e\u5305\u542b\u91cd\u590d\u952e\u7684\u5b57\u5178\uff0c\u53ef\u4ee5\u901a\u8fc7\u6784\u5efa\u4e00\u4e2a\u65b0\u7684\u5b57\u5178\u6765\u53bb\u91cd\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">data_dict = {&#39;a&#39;: 1, &#39;b&#39;: 2, &#39;c&#39;: 2, &#39;d&#39;: 3}<\/p>\n<p>data_dict_unique = {k: v for k, v in data_dict.items() if v not in data_dict_unique.values()}<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h3>\u4e09\u3001\u4f7f\u7528Numpy\u53bb\u91cd<\/h3>\n<\/p>\n<p><p>Numpy \u662f\u53e6\u4e00\u4e2a\u5e38\u7528\u7684\u79d1\u5b66\u8ba1\u7b97\u5e93\uff0c\u4e5f\u63d0\u4f9b\u4e86\u53bb\u91cd\u529f\u80fd\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">import numpy as np<\/p>\n<p>data = np.array([1, 2, 2, 3, 4, 4, 5])<\/p>\n<p>data_unique = np.unique(data)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h3>\u56db\u3001\u7efc\u5408\u5b9e\u4f8b<\/h3>\n<\/p>\n<p><p>\u4ee5\u4e0b\u662f\u4e00\u4e2a\u7efc\u5408\u5b9e\u4f8b\uff0c\u6f14\u793a\u5982\u4f55\u4f7f\u7528 Pandas \u548c Numpy \u53bb\u91cd\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">import pandas as pd<\/p>\n<p>import numpy as np<\/p>\n<h2><strong>\u521b\u5efa\u4e00\u4e2a\u5305\u542b\u91cd\u590d\u6570\u636e\u7684\u6570\u636e\u6846<\/strong><\/h2>\n<p>data = {&#39;Name&#39;: [&#39;Tom&#39;, &#39;Nick&#39;, &#39;Nick&#39;, &#39;Tom&#39;, &#39;Jack&#39;],<\/p>\n<p>        &#39;Age&#39;: [20, 21, 21, 20, 22]}<\/p>\n<p>df = pd.DataFrame(data)<\/p>\n<h2><strong>\u4f7f\u7528Pandas\u53bb\u91cd<\/strong><\/h2>\n<p>df_cleaned = df.drop_duplicates()<\/p>\n<h2><strong>\u663e\u793a\u53bb\u91cd\u540e\u7684\u6570\u636e\u6846<\/strong><\/h2>\n<p>print(&quot;Pandas \u53bb\u91cd\u7ed3\u679c\uff1a&quot;)<\/p>\n<p>print(df_cleaned)<\/p>\n<h2><strong>\u4f7f\u7528Numpy\u53bb\u91cd<\/strong><\/h2>\n<p>data_np = np.array([1, 2, 2, 3, 4, 4, 5])<\/p>\n<p>data_unique = np.unique(data_np)<\/p>\n<p>print(&quot;Numpy \u53bb\u91cd\u7ed3\u679c\uff1a&quot;)<\/p>\n<p>print(data_unique)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h3>\u4e94\u3001\u603b\u7ed3<\/h3>\n<\/p>\n<p><p>\u6570\u636e\u53bb\u91cd\u662f\u6570\u636e\u6e05\u6d17\u4e2d\u7684\u4e00\u4e2a\u5173\u952e\u6b65\u9aa4\u3002<strong>\u4f7f\u7528 Pandas \u7684 <code>drop_duplicates<\/code> \u65b9\u6cd5\u53ef\u4ee5\u65b9\u4fbf\u5730\u53bb\u9664\u6570\u636e\u6846\u4e2d\u7684\u91cd\u590d\u884c<\/strong>\uff0c\u5e76\u4e14\u53ef\u4ee5\u6307\u5b9a\u57fa\u4e8e\u54ea\u4e9b\u5217\u8fdb\u884c\u53bb\u91cd\u3002<strong>\u5bf9\u4e8e\u5217\u8868\u548c\u6570\u7ec4\u6570\u636e\uff0c\u53ef\u4ee5\u4f7f\u7528 Python \u5185\u7f6e\u7684 <code>set<\/code> \u6570\u636e\u7ed3\u6784\u6216 Numpy \u5e93\u6765\u53bb\u91cd<\/strong>\u3002\u8fd9\u4e9b\u65b9\u6cd5\u5404\u6709\u4f18\u7f3a\u70b9\uff0c\u9009\u62e9\u5408\u9002\u7684\u65b9\u6cd5\u53ef\u4ee5\u6839\u636e\u6570\u636e\u7684\u7c7b\u578b\u548c\u5177\u4f53\u9700\u6c42\u6765\u51b3\u5b9a\u3002<\/p>\n<\/p>\n<p><h3>\u516d\u3001\u53bb\u91cd\u7684\u9ad8\u7ea7\u5e94\u7528<\/h3>\n<\/p>\n<p><h4>1\u3001\u53bb\u91cd\u540e\u4fdd\u6301\u987a\u5e8f<\/h4>\n<\/p>\n<p><p>\u5728\u67d0\u4e9b\u60c5\u51b5\u4e0b\uff0c\u53bb\u91cd\u540e\u9700\u8981\u4fdd\u6301\u539f\u59cb\u6570\u636e\u7684\u987a\u5e8f\u3002\u53ef\u4ee5\u4f7f\u7528 <code>OrderedDict<\/code> \u6765\u5b9e\u73b0\u8fd9\u4e00\u70b9\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">from collections import OrderedDict<\/p>\n<p>data = [&#39;a&#39;, &#39;b&#39;, &#39;a&#39;, &#39;c&#39;, &#39;b&#39;]<\/p>\n<p>data_unique = list(OrderedDict.fromkeys(data))<\/p>\n<p>print(data_unique)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>2\u3001\u591a\u6761\u4ef6\u53bb\u91cd<\/h4>\n<\/p>\n<p><p>\u6709\u65f6\u9700\u8981\u57fa\u4e8e\u591a\u4e2a\u6761\u4ef6\u8fdb\u884c\u53bb\u91cd\uff0c\u4f8b\u5982\u5728\u6570\u636e\u6846\u4e2d\u57fa\u4e8e\u67d0\u4e9b\u5217\u7684\u7ec4\u5408\u6765\u53bb\u91cd\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">df_cleaned = df.drop_duplicates(subset=[&#39;column1&#39;, &#39;column2&#39;])<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>3\u3001\u5bf9\u6bd4\u53bb\u91cd\u524d\u540e\u7684\u6570\u636e<\/h4>\n<\/p>\n<p><p>\u5728\u6570\u636e\u53bb\u91cd\u540e\uff0c\u901a\u5e38\u9700\u8981\u5bf9\u6bd4\u53bb\u91cd\u524d\u540e\u7684\u6570\u636e\u91cf\uff0c\u4ee5\u786e\u4fdd\u53bb\u91cd\u64cd\u4f5c\u7684\u6548\u679c\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">original_length = len(df)<\/p>\n<p>cleaned_length = len(df_cleaned)<\/p>\n<p>print(f&quot;\u53bb\u91cd\u524d\u884c\u6570: {original_length}, \u53bb\u91cd\u540e\u884c\u6570: {cleaned_length}&quot;)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h3>\u4e03\u3001\u53bb\u91cd\u7684\u6ce8\u610f\u4e8b\u9879<\/h3>\n<\/p>\n<p><h4>1\u3001\u6570\u636e\u7c7b\u578b<\/h4>\n<\/p>\n<p><p>\u4e0d\u540c\u7684\u6570\u636e\u7c7b\u578b\u5728\u53bb\u91cd\u65f6\u53ef\u80fd\u4f1a\u6709\u4e0d\u540c\u7684\u8868\u73b0\u3002\u4f8b\u5982\uff0c\u6d6e\u70b9\u6570\u7684\u7cbe\u5ea6\u95ee\u9898\u53ef\u80fd\u5bfc\u81f4\u610f\u5916\u7684\u91cd\u590d\u6570\u636e\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">data = [1.0, 1.0, 2.0, 2.0000001]<\/p>\n<p>data_unique = list(set(data))<\/p>\n<p>print(data_unique)  # \u53ef\u80fd\u4f1a\u5f97\u5230 [1.0, 2.0, 2.0000001]<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>2\u3001\u7a7a\u503c\u5904\u7406<\/h4>\n<\/p>\n<p><p>\u5728\u53bb\u91cd\u8fc7\u7a0b\u4e2d\uff0c\u7a7a\u503c\u7684\u5904\u7406\u4e5f\u662f\u4e00\u4e2a\u9700\u8981\u8003\u8651\u7684\u95ee\u9898\u3002Pandas \u5728\u53bb\u91cd\u65f6\u4f1a\u9ed8\u8ba4\u4fdd\u7559\u7a7a\u503c\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">data = {&#39;Name&#39;: [&#39;Tom&#39;, &#39;Nick&#39;, &#39;Nick&#39;, &#39;Tom&#39;, None],<\/p>\n<p>        &#39;Age&#39;: [20, 21, 21, 20, 22]}<\/p>\n<p>df = pd.DataFrame(data)<\/p>\n<p>df_cleaned = df.drop_duplicates()<\/p>\n<p>print(df_cleaned)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>3\u3001\u6027\u80fd\u4f18\u5316<\/h4>\n<\/p>\n<p><p>\u5bf9\u4e8e\u5927\u6570\u636e\u96c6\uff0c\u53bb\u91cd\u64cd\u4f5c\u53ef\u80fd\u4f1a\u6bd4\u8f83\u8017\u65f6\u3002\u53ef\u4ee5\u901a\u8fc7\u4f18\u5316\u4ee3\u7801\u6216\u4f7f\u7528\u66f4\u9ad8\u6548\u7684\u7b97\u6cd5\u6765\u63d0\u9ad8\u6027\u80fd\u3002\u4f8b\u5982\uff0c\u4f7f\u7528\u54c8\u5e0c\u8868\u6765\u52a0\u901f\u67e5\u627e\u64cd\u4f5c\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">def remove_duplicates(data):<\/p>\n<p>    seen = set()<\/p>\n<p>    result = []<\/p>\n<p>    for item in data:<\/p>\n<p>        if item not in seen:<\/p>\n<p>            seen.add(item)<\/p>\n<p>            result.append(item)<\/p>\n<p>    return result<\/p>\n<p>data = [1, 2, 2, 3, 4, 4, 5]<\/p>\n<p>data_unique = remove_duplicates(data)<\/p>\n<p>print(data_unique)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h3>\u516b\u3001\u603b\u7ed3\u4e0e\u5c55\u671b<\/h3>\n<\/p>\n<p><p>\u6570\u636e\u53bb\u91cd\u662f\u6570\u636e\u6e05\u6d17\u4e2d\u7684\u57fa\u672c\u64cd\u4f5c\uff0c\u4f46\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\u53ef\u80fd\u4f1a\u9047\u5230\u5404\u79cd\u590d\u6742\u60c5\u51b5\u3002<strong>\u4f7f\u7528 Pandas \u7684 <code>drop_duplicates<\/code> \u65b9\u6cd5\u662f\u6700\u5e38\u7528\u4e14\u9ad8\u6548\u7684\u53bb\u91cd\u65b9\u5f0f<\/strong>\uff0c\u4f46\u5728\u5904\u7406\u4e0d\u540c\u7c7b\u578b\u7684\u6570\u636e\u65f6\uff0c\u8fd8\u9700\u8981\u7ed3\u5408\u5176\u4ed6\u65b9\u6cd5\u548c\u6280\u5de7\u3002\u901a\u8fc7\u4e0d\u65ad\u5b9e\u8df5\u548c\u4f18\u5316\uff0c\u53ef\u4ee5\u63d0\u9ad8\u6570\u636e\u53bb\u91cd\u7684\u51c6\u786e\u6027\u548c\u6548\u7387\uff0c\u4e3a\u540e\u7eed\u7684\u6570\u636e\u5206\u6790\u548c\u5efa\u6a21\u6253\u4e0b\u575a\u5b9e\u7684\u57fa\u7840\u3002<\/p>\n<\/p>\n<p><h3>\u4e5d\u3001\u53bb\u91cd\u7684\u5b9e\u6218\u6848\u4f8b<\/h3>\n<\/p>\n<p><h4>1\u3001\u6848\u4f8b\u80cc\u666f<\/h4>\n<\/p>\n<p><p>\u5047\u8bbe\u6211\u4eec\u6709\u4e00\u4e2a\u5305\u542b\u5ba2\u6237\u4fe1\u606f\u7684\u6570\u636e\u96c6\uff0c\u5176\u4e2d\u5305\u542b\u5ba2\u6237\u7684\u59d3\u540d\u3001\u7535\u8bdd\u548c\u7535\u5b50\u90ae\u4ef6\u7b49\u4fe1\u606f\u3002\u7531\u4e8e\u6570\u636e\u5f55\u5165\u7684\u539f\u56e0\uff0c\u53ef\u80fd\u5b58\u5728\u91cd\u590d\u7684\u5ba2\u6237\u8bb0\u5f55\u3002\u6211\u4eec\u9700\u8981\u5bf9\u6570\u636e\u8fdb\u884c\u6e05\u6d17\uff0c\u53bb\u9664\u91cd\u590d\u7684\u5ba2\u6237\u4fe1\u606f\u3002<\/p>\n<\/p>\n<p><h4>2\u3001\u8bfb\u53d6\u6570\u636e<\/h4>\n<\/p>\n<p><p>\u9996\u5148\uff0c\u8bfb\u53d6\u6570\u636e\u5e76\u8fdb\u884c\u521d\u6b65\u67e5\u770b\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">import pandas as pd<\/p>\n<h2><strong>\u5047\u8bbe\u6570\u636e\u5b58\u50a8\u5728\u4e00\u4e2a CSV \u6587\u4ef6\u4e2d<\/strong><\/h2>\n<p>df = pd.read_csv(&#39;customers.csv&#39;)<\/p>\n<h2><strong>\u67e5\u770b\u6570\u636e\u7684\u524d\u51e0\u884c<\/strong><\/h2>\n<p>print(df.head())<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>3\u3001\u5206\u6790\u91cd\u590d\u60c5\u51b5<\/h4>\n<\/p>\n<p><p>\u901a\u8fc7\u67e5\u770b\u6570\u636e\u7684\u57fa\u672c\u4fe1\u606f\u548c\u63cf\u8ff0\u6027\u7edf\u8ba1\uff0c\u4e86\u89e3\u6570\u636e\u4e2d\u91cd\u590d\u7684\u60c5\u51b5\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\"># \u67e5\u770b\u6570\u636e\u57fa\u672c\u4fe1\u606f<\/p>\n<p>print(df.info())<\/p>\n<h2><strong>\u67e5\u770b\u91cd\u590d\u7684\u884c\u6570<\/strong><\/h2>\n<p>print(&quot;\u91cd\u590d\u884c\u6570: &quot;, df.duplicated().sum())<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>4\u3001\u53bb\u91cd\u64cd\u4f5c<\/h4>\n<\/p>\n<p><p>\u4f7f\u7528 <code>drop_duplicates<\/code> \u65b9\u6cd5\u53bb\u9664\u91cd\u590d\u5ba2\u6237\u8bb0\u5f55\uff0c\u53ef\u4ee5\u57fa\u4e8e\u59d3\u540d\u3001\u7535\u8bdd\u548c\u7535\u5b50\u90ae\u4ef6\u8fd9\u4e09\u4e2a\u5b57\u6bb5\u6765\u5224\u65ad\u91cd\u590d\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">df_cleaned = df.drop_duplicates(subset=[&#39;Name&#39;, &#39;Phone&#39;, &#39;Em<a href=\"https:\/\/docs.pingcode.com\/blog\/59162.html\" target=\"_blank\">AI<\/a>l&#39;])<\/p>\n<h2><strong>\u67e5\u770b\u53bb\u91cd\u540e\u7684\u6570\u636e<\/strong><\/h2>\n<p>print(df_cleaned.info())<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>5\u3001\u9a8c\u8bc1\u7ed3\u679c<\/h4>\n<\/p>\n<p><p>\u9a8c\u8bc1\u53bb\u91cd\u540e\u7684\u6570\u636e\uff0c\u786e\u4fdd\u53bb\u91cd\u64cd\u4f5c\u7684\u6b63\u786e\u6027\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\"># \u9a8c\u8bc1\u53bb\u91cd\u540e\u7684\u884c\u6570<\/p>\n<p>print(&quot;\u53bb\u91cd\u540e\u7684\u884c\u6570: &quot;, len(df_cleaned))<\/p>\n<h2><strong>\u968f\u673a\u62bd\u53d6\u51e0\u884c\u6570\u636e\u8fdb\u884c\u4eba\u5de5\u9a8c\u8bc1<\/strong><\/h2>\n<p>print(df_cleaned.sample(5))<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h4>6\u3001\u4fdd\u5b58\u6e05\u6d17\u540e\u7684\u6570\u636e<\/h4>\n<\/p>\n<p><p>\u5c06\u6e05\u6d17\u540e\u7684\u6570\u636e\u4fdd\u5b58\u5230\u65b0\u7684\u6587\u4ef6\u4e2d\uff0c\u4ee5\u4fbf\u540e\u7eed\u4f7f\u7528\uff1a<\/p>\n<\/p>\n<p><pre><code class=\"language-python\">df_cleaned.to_csv(&#39;customers_cleaned.csv&#39;, index=False)<\/p>\n<p><\/code><\/pre>\n<\/p>\n<p><h3>\u5341\u3001\u603b\u7ed3\u4e0e\u5c55\u671b<\/h3>\n<\/p>\n<p><p>\u901a\u8fc7\u672c\u6587\u7684\u4ecb\u7ecd\uff0c\u6211\u4eec\u4e86\u89e3\u4e86\u6570\u636e\u53bb\u91cd\u7684\u591a\u79cd\u65b9\u6cd5\u548c\u6280\u5de7\u3002<strong>\u4f7f\u7528 Pandas \u7684 <code>drop_duplicates<\/code> \u65b9\u6cd5\u53ef\u4ee5\u9ad8\u6548\u5730\u53bb\u9664\u6570\u636e\u6846\u4e2d\u7684\u91cd\u590d\u884c<\/strong>\uff0c\u540c\u65f6\u7ed3\u5408\u5176\u4ed6\u65b9\u6cd5\u548c\u6280\u5de7\uff0c\u53ef\u4ee5\u5904\u7406\u4e0d\u540c\u7c7b\u578b\u7684\u6570\u636e\u548c\u590d\u6742\u7684\u53bb\u91cd\u9700\u6c42\u3002\u6570\u636e\u53bb\u91cd\u662f\u6570\u636e\u6e05\u6d17\u7684\u91cd\u8981\u6b65\u9aa4\uff0c\u638c\u63e1\u8fd9\u4e00\u6280\u80fd\u53ef\u4ee5\u5927\u5927\u63d0\u9ad8\u6570\u636e\u5904\u7406\u7684\u8d28\u91cf\u548c\u6548\u7387\u3002\u672a\u6765\uff0c\u6211\u4eec\u53ef\u4ee5\u8fdb\u4e00\u6b65\u7814\u7a76\u548c\u63a2\u7d22\u66f4\u9ad8\u6548\u7684\u53bb\u91cd\u7b97\u6cd5\u548c\u5de5\u5177\uff0c\u4ee5\u5e94\u5bf9\u65e5\u76ca\u590d\u6742\u548c\u5e9e\u5927\u7684\u6570\u636e\u96c6\u3002<\/p>\n<\/p>\n<p><p>\u901a\u8fc7\u4e0d\u65ad\u5b9e\u8df5\u548c\u4f18\u5316\uff0c\u53ef\u4ee5\u63d0\u9ad8\u6570\u636e\u53bb\u91cd\u7684\u51c6\u786e\u6027\u548c\u6548\u7387\uff0c\u4e3a\u540e\u7eed\u7684\u6570\u636e\u5206\u6790\u548c\u5efa\u6a21\u6253\u4e0b\u575a\u5b9e\u7684\u57fa\u7840\u3002\u5e0c\u671b\u672c\u6587\u80fd\u4e3a\u5927\u5bb6\u63d0\u4f9b\u4e00\u4e9b\u6709\u4ef7\u503c\u7684\u53c2\u8003\u548c\u5e2e\u52a9\u3002<\/p>\n<\/p>\n<h2><strong>\u76f8\u5173\u95ee\u7b54FAQs\uff1a<\/strong><\/h2>\n<p> <strong>\u5982\u4f55\u4f7f\u7528Python\u8fdb\u884c\u6570\u636e\u53bb\u91cd\uff1f<\/strong><\/p>\n<p>\u5728Python\u4e2d\uff0c\u4f7f\u7528Pandas\u5e93\u8fdb\u884c\u6570\u636e\u53bb\u91cd\u662f\u975e\u5e38\u7b80\u5355\u7684\u3002\u9996\u5148\uff0c\u60a8\u9700\u8981\u5bfc\u5165Pandas\u5e93\u5e76\u52a0\u8f7d\u6570\u636e\u96c6\u3002\u901a\u8fc7\u8c03\u7528<code>drop_duplicates()<\/code>\u65b9\u6cd5\uff0c\u53ef\u4ee5\u8f7b\u677e\u53bb\u9664\u91cd\u590d\u7684\u884c\u3002\u4f8b\u5982\uff1a<code>df.drop_duplicates(inplace=True)<\/code>\u3002\u8be5\u65b9\u6cd5\u8fd8\u5141\u8bb8\u60a8\u6307\u5b9a\u7279\u5b9a\u7684\u5217\u4ee5\u4fbf\u4ec5\u5728\u8fd9\u4e9b\u5217\u4e2d\u67e5\u627e\u91cd\u590d\u9879\u3002<\/p>\n<p><strong>\u53bb\u91cd\u8fc7\u7a0b\u4e2d\u6709\u54ea\u4e9b\u5e38\u89c1\u7684\u9519\u8bef\u9700\u8981\u6ce8\u610f\uff1f<\/strong><\/p>\n<p>\u5728\u6570\u636e\u53bb\u91cd\u65f6\uff0c\u4e00\u4e9b\u5e38\u89c1\u7684\u9519\u8bef\u5305\u62ec\uff1a\u672a\u8003\u8651\u5927\u5c0f\u5199\u5dee\u5f02\u5bfc\u81f4\u7684\u91cd\u590d\u3001\u5ffd\u89c6\u7a7a\u503c\uff08NaN\uff09\u5904\u7406\u7b49\u3002\u4f7f\u7528<code>drop_duplicates()<\/code>\u65f6\uff0c\u53ef\u4ee5\u901a\u8fc7\u8bbe\u7f6e<code>ignore_index=True<\/code>\u53c2\u6570\u6765\u91cd\u65b0\u7d22\u5f15\u7ed3\u679c\uff0c\u8fd9\u6837\u53ef\u4ee5\u907f\u514d\u56e0\u4e3a\u7d22\u5f15\u95ee\u9898\u5bfc\u81f4\u7684\u6df7\u6dc6\u3002\u6b64\u5916\uff0c\u4f7f\u7528<code>subset<\/code>\u53c2\u6570\u53ef\u4ee5\u6307\u5b9a\u9700\u8981\u68c0\u67e5\u91cd\u590d\u7684\u5217\uff0c\u4ece\u800c\u63d0\u9ad8\u53bb\u91cd\u7684\u7cbe\u786e\u5ea6\u3002<\/p>\n<p><strong>\u5982\u4f55\u9a8c\u8bc1\u6570\u636e\u53bb\u91cd\u7684\u6548\u679c\uff1f<\/strong><\/p>\n<p>\u5728\u6267\u884c\u6570\u636e\u53bb\u91cd\u540e\uff0c\u9a8c\u8bc1\u53bb\u91cd\u6548\u679c\u975e\u5e38\u91cd\u8981\u3002\u60a8\u53ef\u4ee5\u901a\u8fc7\u6bd4\u8f83\u53bb\u91cd\u524d\u540e\u7684\u6570\u636e\u957f\u5ea6\u6765\u5224\u65ad\u3002\u4f8b\u5982\uff0c\u4f7f\u7528<code>len(df)<\/code>\u67e5\u770b\u539f\u59cb\u6570\u636e\u7684\u884c\u6570\uff0c\u53bb\u91cd\u540e\u518d\u6b21\u67e5\u770b\u884c\u6570\uff0c\u786e\u4fdd\u91cd\u590d\u884c\u5df2\u88ab\u6210\u529f\u5220\u9664\u3002\u6b64\u5916\uff0c\u901a\u8fc7\u4f7f\u7528<code>value_counts()<\/code>\u65b9\u6cd5\uff0c\u60a8\u53ef\u4ee5\u5206\u6790\u7279\u5b9a\u5217\u7684\u6570\u636e\u5206\u5e03\uff0c\u4ece\u800c\u66f4\u76f4\u89c2\u5730\u4e86\u89e3\u53bb\u91cd\u6548\u679c\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"\u5728\u6570\u636e\u6e05\u6d17\u8fc7\u7a0b\u4e2d\uff0c\u6570\u636e\u53bb\u91cd\u662f\u4e00\u4e2a\u975e\u5e38\u91cd\u8981\u7684\u6b65\u9aa4\u3002Python \u63d0\u4f9b\u4e86\u591a\u79cd\u65b9\u6cd5\u6765\u5b9e\u73b0\u6570\u636e\u53bb\u91cd\uff0c\u5305\u62ec\u4f7f\u7528 Pand [&hellip;]","protected":false},"author":3,"featured_media":1166422,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[37],"tags":[],"acf":[],"_links":{"self":[{"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/posts\/1166418"}],"collection":[{"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/comments?post=1166418"}],"version-history":[{"count":"1","href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/posts\/1166418\/revisions"}],"predecessor-version":[{"id":1166423,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/posts\/1166418\/revisions\/1166423"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/media\/1166422"}],"wp:attachment":[{"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/media?parent=1166418"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/categories?post=1166418"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/docs.pingcode.com\/wp-json\/wp\/v2\/tags?post=1166418"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}