Python API Reference
This guide covers using OneCite as a Python library in your own code.
Basic Usage
Simple Citation Processing
from onecite import process_references
# Process a simple reference
result = process_references(
input_content="10.1038/nature14539",
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=lambda candidates: 0 # Auto-select first match
)
# Print results
for citation in result['results']:
print(citation)
The Result Dictionary
The process_references
function returns a dictionary containing:
results
(List[str]): List of formatted citation stringsreport
(dict): Processing report with the following keys:total
(int): Total number of entries processedsucceeded
(int): Number of successfully processed entriesfailed_entries
(List[Dict]): List of failed entries with error details
result = process_references(
input_content="10.1038/nature14539",
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=lambda candidates: 0
)
print(f"Total: {result['report']['total']}")
print(f"Succeeded: {result['report']['succeeded']}")
print(f"Failed: {len(result['report']['failed_entries'])}")
Processing Different Input Formats
Plain Text Input
from onecite import process_references
txt_content = """
10.1038/nature14539
Vaswani et al., 2017, Attention is all you need
Smith (2020) Neural Architecture Search
"""
result = process_references(
input_content=txt_content,
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=lambda candidates: 0
)
# Access results
print('\n\n'.join(result['results']))
BibTeX Input
from onecite import process_references
bibtex_content = """
@article{LeCun2015,
title = {Deep Learning},
author = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
journal = {Nature},
year = {2015}
}
"""
result = process_references(
input_content=bibtex_content,
input_type="bib",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=lambda candidates: 0
)
print('\n\n'.join(result['results']))
Output Formats
# BibTeX format
result = process_references(
input_content="10.1038/nature14539",
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=lambda candidates: 0
)
# APA format
result = process_references(
input_content="10.1038/nature14539",
input_type="txt",
template_name="journal_article_full",
output_format="apa",
interactive_callback=lambda candidates: 0
)
# MLA format
result = process_references(
input_content="10.1038/nature14539",
input_type="txt",
template_name="journal_article_full",
output_format="mla",
interactive_callback=lambda candidates: 0
)
Interactive Selection with Callbacks
For handling ambiguous references programmatically, use a callback function:
from onecite import process_references
def auto_select_best(candidates):
"""Always select the first (best match) candidate"""
return 0 # Return the index of the selected candidate (0-based)
result = process_references(
input_content="Deep learning Hinton",
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=auto_select_best
)
print('\n\n'.join(result['results']))
Custom Callback Logic
def smart_selector(candidates):
"""Select candidate with most complete metadata"""
best_idx = 0
best_score = 0
for idx, candidate in enumerate(candidates):
# Score based on number of fields
score = sum(1 for v in candidate.values() if v)
if score > best_score:
best_score = score
best_idx = idx
return best_idx
result = process_references(
input_content="Deep learning nature 2015",
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=smart_selector
)
print('\n\n'.join(result['results']))
Advanced Data Structures
OneCite defines three TypedDict classes representing different stages of the processing pipeline:
RawEntry
A TypedDict representing an unprocessed reference entry (Stage 1):
from onecite import RawEntry
from typing import Dict, Any, Optional
# RawEntry is a TypedDict with these fields:
entry: RawEntry = {
'id': 1,
'raw_text': "10.1038/nature14539",
'doi': "10.1038/nature14539",
'url': None,
'query_string': None,
'original_entry': None
}
IdentifiedEntry
A TypedDict representing an entry after identification from data sources (Stage 2):
from onecite import IdentifiedEntry
# IdentifiedEntry includes fields like:
# id, raw_text, doi, arxiv_id, url, metadata, status
CompletedEntry
A TypedDict representing a fully processed entry with all metadata (Stage 3):
from onecite import CompletedEntry
# CompletedEntry includes fields like:
# id, doi, status, bib_key, bib_data
Note: These are TypedDict classes without methods. They are primarily used internally by the pipeline. Most users should interact with OneCite through the process_references()
function.
Working with Templates
Load and inspect templates:
from onecite import TemplateLoader
loader = TemplateLoader()
# Load a specific template
template = loader.load_template("journal_article_full")
print(f"Template name: {template['name']}")
print(f"Entry type: {template['entry_type']}")
print(f"Fields: {[f['name'] for f in template['fields']]}")
# Use a custom templates directory
custom_loader = TemplateLoader(templates_dir="/path/to/templates")
custom_template = custom_loader.load_template("my_template")
Using the Pipeline Controller
For advanced use cases requiring more control over the processing pipeline:
from onecite import PipelineController
# Create controller (optionally enable Google Scholar)
controller = PipelineController(use_google_scholar=False)
# Process with full control
result = controller.process(
input_content="10.1038/nature14539",
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=lambda candidates: 0
)
print('\n\n'.join(result['results']))
Note: Most users should use process_references()
instead, which is simpler and provides the same functionality.
Error Handling
Handling Exceptions
from onecite import process_references, ValidationError, ParseError
try:
result = process_references(
input_content="invalid_reference",
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=lambda candidates: 0
)
except ValidationError as e:
print(f"Validation error: {e}")
except ParseError as e:
print(f"Parse error: {e}")
except Exception as e:
print(f"Processing error: {e}")
Processing Files
Reading from File
from onecite import process_references
# Read from file
with open("references.txt", "r", encoding="utf-8") as f:
content = f.read()
result = process_references(
input_content=content,
input_type="txt",
template_name="journal_article_full",
output_format="bibtex",
interactive_callback=lambda candidates: 0
)
# Write to file
output_content = '\n\n'.join(result['results'])
with open("output.bib", "w", encoding="utf-8") as f:
f.write(output_content)
Complete Example
from onecite import process_references
# Read references
with open("my_references.txt", "r", encoding="utf-8") as f:
references = f.read()
# Process with APA format
result = process_references(
input_content=references,
input_type="txt",
template_name="journal_article_full",
output_format="apa",
interactive_callback=lambda candidates: 0 # Auto-select first match
)
# Check results
report = result['report']
print(f"Total entries: {report['total']}")
print(f"Successfully processed: {report['succeeded']}")
print(f"Failed: {len(report['failed_entries'])}")
if report['failed_entries']:
print("\nFailed entries:")
for failed in report['failed_entries']:
print(f" - Entry {failed['id']}: {failed.get('error', 'Unknown error')}")
# Save output
output_content = '\n\n'.join(result['results'])
with open("formatted_refs.txt", "w", encoding="utf-8") as f:
f.write(output_content)
print("\nDone!")
API Reference
See Core API Reference for the complete API documentation.
Next Steps
Learn AI Assistant Integration with MCP for AI assistant integration
Explore Custom Templates for custom formatting
Check Frequently Asked Questions (FAQ) for common questions