Here’s a python snippet for analysing an iNaturalist export file and exporting an HTML-formatted list of species which only have observations from a single person (e.g. this list for the CNC Wien 2021)
# coding: utf-8
import collections
import argparse
import pandas as pd
"""
Find which species in an iNaturalist export only have observations from a single observer.
Get an export from here: https://www.inaturalist.org/observations/export with a query such
as quality_grade=research&identifications=any&rank=species&projects[]=92926 and at least the
following columns: taxon_id, scientific_name, common_name, user_login
Download it, extract the CSV, then run this script with the file name as its argument. It will
output basic stats formatted as HTML.
The only external module required is pandas.
Example usage:
py uniquely_observed_species.py wien_cnc_2021.csv > wien_cnc_2021_results.html
By Barnaby Walters waterpigs.co.uk
"""
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Given an iNaturalist observation export, find species which were only observed by a single person.')
parser.add_argument('export_file')
args = parser.parse_args()
uniquely_observed_species = {}
df = pd.read_csv(args.export_file)
# Create a local species reference from the dataframe.
species = df.loc[:, ('taxon_id', 'scientific_name', 'common_name')].drop_duplicates()
species = species.set_index(species.loc[:, 'taxon_id'])
for tid in species.index:
observers = df.query('taxon_id == @tid').loc[:, 'user_login'].drop_duplicates()
if observers.shape[0] == 1:
observer = observers.squeeze()
if observer not in uniquely_observed_species:
uniquely_observed_species[observer] = []
uniquely_observed_species[observer].append(tid)
sorted_observations = sorted(uniquely_observed_species.items(), key=lambda t: len(t[1]), reverse=True)
print(f"<p>{sum([len(t) for o, t in sorted_observations])} taxa uniquely observed by {len(sorted_observations)} observers.</p>")
print('<p>')
for observer, _ in sorted_observations:
print(f"@{observer} ", end='')
print('</p>')
for observer, taxa in sorted_observations:
print(f"""\n\n<p><a href="https://www.inaturalist.org/people/{observer}">@{observer}</a> ({len(taxa)} taxa):</p><ul>""")
for tid in taxa:
t = species.loc[tid]
if not pd.isnull(t['common_name']):
print(f"""<li><a href="https://www.inaturalist.org/taxa/{tid}"><i>{t['scientific_name']}</i> ({t['common_name']})</a></li>""")
else:
print(f"""<li><a href="https://www.inaturalist.org/taxa/{tid}"><i>{t['scientific_name']}</i></a></li>""")
print("</ul>")
Barnaby Walters
Enid Walters
Grand Moff Darth Salt
yhthnh
Jan Rychter
Sophie Dennis
Andy Robinson
kiko mayorga
Joschi Kuphal 吉
TradTöchter
RustCast
TT-B
Andy 🦊🐼🦦🦡🐧🐝🐞🦋🐾🐌🕷️