1. Barnaby Walters

    Here’s a python snippet for analysing an iNaturalist export file and exporting an HTML-formatted list of species which only have observations from a single person (e.g. this list for the CNC Wien 2021)

    # coding: utf-8
    
    import collections
    import argparse
    import pandas as pd
    
    """
    Find which species in an iNaturalist export only have observations from a single observer.
    
    Get an export from here: https://www.inaturalist.org/observations/export with a query such
    as quality_grade=research&identifications=any&rank=species&projects[]=92926 and at least the
    following columns: taxon_id, scientific_name, common_name, user_login
    
    Download it, extract the CSV, then run this script with the file name as its argument. It will
    output basic stats formatted as HTML.
    
    The only external module required is pandas.
    
    Example usage:
    
            py uniquely_observed_species.py wien_cnc_2021.csv > wien_cnc_2021_results.html
    
    By Barnaby Walters waterpigs.co.uk
    """
    
    if __name__ == "__main__":
        parser = argparse.ArgumentParser(description='Given an iNaturalist observation export, find species which were only observed by a single person.')
        parser.add_argument('export_file')
    
        args = parser.parse_args()
    
        uniquely_observed_species = {}
    
        df = pd.read_csv(args.export_file)
    
        # Create a local species reference from the dataframe.
        species = df.loc[:, ('taxon_id', 'scientific_name', 'common_name')].drop_duplicates()
        species = species.set_index(species.loc[:, 'taxon_id'])
    
        for tid in species.index:
            observers = df.query('taxon_id == @tid').loc[:, 'user_login'].drop_duplicates()
            if observers.shape[0] == 1:
                observer = observers.squeeze()
                if observer not in uniquely_observed_species:
                    uniquely_observed_species[observer] = []
                uniquely_observed_species[observer].append(tid)
    
        sorted_observations = sorted(uniquely_observed_species.items(), key=lambda t: len(t[1]), reverse=True)
    
        print(f"<p>{sum([len(t) for o, t in sorted_observations])} taxa uniquely observed by {len(sorted_observations)} observers.</p>")
    
        print('<p>')
        for observer, _ in sorted_observations:
            print(f"@{observer} ", end='')
        print('</p>')
    
        for observer, taxa in sorted_observations:
            print(f"""\n\n<p><a href="https://www.inaturalist.org/people/{observer}">@{observer}</a> ({len(taxa)} taxa):</p><ul>""")
            for tid in taxa:
                t = species.loc[tid]
                if not pd.isnull(t['common_name']):
                    print(f"""<li><a href="https://www.inaturalist.org/taxa/{tid}"><i>{t['scientific_name']}</i> ({t['common_name']})</a></li>""")
                else:
                    print(f"""<li><a href="https://www.inaturalist.org/taxa/{tid}"><i>{t['scientific_name']}</i></a></li>""")
            print("</ul>")