| # |
| # CDDL HEADER START |
| # |
| # The contents of this file are subject to the terms of the |
| # Common Development and Distribution License (the "License"). |
| # You may not use this file except in compliance with the License. |
| # |
| # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| # or http://www.opensolaris.org/os/licensing. |
| # See the License for the specific language governing permissions |
| # and limitations under the License. |
| # |
| # When distributing Covered Code, include this CDDL HEADER in each |
| # file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| # If applicable, add the following below this CDDL HEADER, with the |
| # fields enclosed by brackets "[]" replaced with your own identifying |
| # information: Portions Copyright [yyyy] [name of copyright owner] |
| # |
| # CDDL HEADER END |
| # |
| |
| # |
| # Copyright 2016 Joyent, Inc. |
| # |
| |
| import re, sys |
| |
| spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n' |
| altMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n' |
| |
| misspellings = { |
| 'absense': 'absence', |
| 'accessable': 'accessible', |
| 'accomodate': 'accommodate', |
| 'accomodation': 'accommodation', |
| 'accross': 'across', |
| 'acheive': 'achieve', |
| 'addional': 'additional', |
| 'addres': 'address', |
| 'admininistrative': 'administrative', |
| 'adminstered': 'administered', |
| 'adminstrate': 'administrate', |
| 'adminstration': 'administration', |
| 'adminstrative': 'administrative', |
| 'adminstrator': 'administrator', |
| 'admissability': 'admissibility', |
| 'adress': 'address', |
| 'adressable': 'addressable', |
| 'adressed': 'addressed', |
| 'adressing': 'addressing, dressing', |
| 'aginst': 'against', |
| 'agression': 'aggression', |
| 'agressive': 'aggressive', |
| 'alot': 'a lot, allot', |
| 'and and': 'and', |
| 'apparantly': 'apparently', |
| 'appearence': 'appearance', |
| 'arguement': 'argument', |
| 'assasination': 'assassination', |
| 'auxilliary': 'auxiliary', |
| 'basicly': 'basically', |
| 'begining': 'beginning', |
| 'belive': 'believe', |
| 'beteen': 'between', |
| 'betwen': 'between', |
| 'beween': 'between', |
| 'bewteen': 'between', |
| 'bizzare': 'bizarre', |
| 'buisness': 'business', |
| 'calender': 'calendar', |
| 'cemetary': 'cemetery', |
| 'chauffer': 'chauffeur', |
| 'collegue': 'colleague', |
| 'comming': 'coming', |
| 'commited': 'committed', |
| 'commitee': 'committee', |
| 'commiting': 'committing', |
| 'comparision': 'comparison', |
| 'comparisions': 'comparisons', |
| 'compatability': 'compatibility', |
| 'compatable': 'compatible', |
| 'compatablity': 'compatibility', |
| 'compatiable': 'compatible', |
| 'compatiblity': 'compatibility', |
| 'completly': 'completely', |
| 'concious': 'conscious', |
| 'condidtion': 'condition', |
| 'conected': 'connected', |
| 'conjuction': 'conjunction', |
| 'continous': 'continuous', |
| 'curiousity': 'curiosity', |
| 'deamon': 'daemon', |
| 'definately': 'definitely', |
| 'desireable': 'desirable', |
| 'diffrent': 'different', |
| 'dilemna': 'dilemma', |
| 'dissapear': 'disappear', |
| 'dissapoint': 'disappoint', |
| 'ecstacy': 'ecstasy', |
| 'embarass': 'embarrass', |
| 'enviroment': 'environment', |
| 'exept': 'except', |
| 'existance': 'existence', |
| 'familar': 'familiar', |
| 'finaly': 'finally', |
| 'folowing': 'following', |
| 'foriegn': 'foreign', |
| 'forseeable': 'foreseeable', |
| 'fourty': 'forty', |
| 'foward': 'forward', |
| 'freind': 'friend', |
| 'futher': 'further', |
| 'gaurd': 'guard', |
| 'glamourous': 'glamorous', |
| 'goverment': 'government', |
| 'happend': 'happened', |
| 'harrassment': 'harassment', |
| 'hierachical': 'hierarchical', |
| 'hierachies': 'hierarchies', |
| 'hierachy': 'hierarchy', |
| 'hierarcical': 'hierarchical', |
| 'hierarcy': 'hierarchy', |
| 'honourary': 'honorary', |
| 'humourous': 'humorous', |
| 'idiosyncracy': 'idiosyncrasy', |
| 'immediatly': 'immediately', |
| 'inaccessable': 'inaccessible', |
| 'inbetween': 'between', |
| 'incidently': 'incidentally', |
| 'independant': 'independent', |
| 'infomation': 'information', |
| 'interupt': 'interrupt', |
| 'intial': 'initial', |
| 'intially': 'initially', |
| 'irresistable': 'irresistible', |
| 'jist': 'gist', |
| 'knowlege': 'knowledge', |
| 'lenght': 'length', |
| 'liase': 'liaise', |
| 'liason': 'liaison', |
| 'libary': 'library', |
| 'maching': 'machine, marching, matching', |
| 'millenia': 'millennia', |
| 'millenium': 'millennium', |
| 'neccessary': 'necessary', |
| 'negotation': 'negotiation', |
| 'nontheless': 'nonetheless', |
| 'noticable': 'noticeable', |
| 'occassion': 'occasion', |
| 'occassional': 'occasional', |
| 'occassionally': 'occasionally', |
| 'occurance': 'occurrence', |
| 'occured': 'occurred', |
| 'occurence': 'occurrence', |
| 'occuring': 'occurring', |
| 'ommision': 'omission', |
| 'orginal': 'original', |
| 'orginally': 'originally', |
| 'ouput': 'output', |
| 'overriden': 'overridden', |
| 'particuliar': 'particular', |
| 'pavillion': 'pavilion', |
| 'peice': 'piece', |
| 'persistant': 'persistent', |
| 'politican': 'politician', |
| 'posession': 'possession', |
| 'possiblity': 'possibility', |
| 'preceed': 'precede', |
| 'preceeded': 'preceded', |
| 'preceeding': 'preceding', |
| 'preceeds': 'precedes', |
| 'prefered': 'preferred', |
| 'prefering': 'preferring', |
| 'presense': 'presence', |
| 'proces': 'process', |
| 'propoganda': 'propaganda', |
| 'psuedo': 'pseudo', |
| 'publically': 'publicly', |
| 'realy': 'really', |
| 'reciept': 'receipt', |
| 'recieve': 'receive', |
| 'recieved': 'received', |
| 'reciever': 'receiver', |
| 'recievers': 'receivers', |
| 'recieves': 'receives', |
| 'recieving': 'receiving', |
| 'recomend': 'recommend', |
| 'recomended': 'recommended', |
| 'recomending': 'recommending', |
| 'recomends': 'recommends', |
| 'recurse': 'recur', |
| 'recurses': 'recurs', |
| 'recursing': 'recurring', |
| 'refered': 'referred', |
| 'refering': 'referring', |
| 'religous': 'religious', |
| 'rember': 'remember', |
| 'remeber': 'remember', |
| 'repetion': 'repetition', |
| 'reponsible': 'responsible', |
| 'resistence': 'resistance', |
| 'retreive': 'retrieve', |
| 'seige': 'siege', |
| 'sence': 'since', |
| 'seperate': 'separate', |
| 'seperated': 'separated', |
| 'seperately': 'separately', |
| 'seperates': 'separates', |
| 'similiar': 'similar', |
| 'somwhere': 'somewhere', |
| 'sould': 'could, should, sold, soul', |
| 'sturcture': 'structure', |
| 'succesful': 'successful', |
| 'succesfully': 'successfully', |
| 'successfull': 'successful', |
| 'sucessful': 'successful', |
| 'supercede': 'supersede', |
| 'supress': 'suppress', |
| 'supressed': 'suppressed', |
| 'suprise': 'surprise', |
| 'suprisingly': 'surprisingly', |
| 'sytem': 'system', |
| 'tendancy': 'tendency', |
| 'the the': 'the', |
| 'the these': 'these', |
| 'therefor': 'therefore', |
| 'threshhold': 'threshold', |
| 'tolerence': 'tolerance', |
| 'tommorow': 'tomorrow', |
| 'tommorrow': 'tomorrow', |
| 'tounge': 'tongue', |
| 'tranformed': 'transformed', |
| 'transfered': 'transferred', |
| 'truely': 'truly', |
| 'trustworthyness': 'trustworthiness', |
| 'uncommited': 'uncommitted', |
| 'unforseen': 'unforeseen', |
| 'unfortunatly': 'unfortunately', |
| 'unsuccessfull': 'unsuccessful', |
| 'untill': 'until', |
| 'upto': 'up to', |
| 'whereever': 'wherever', |
| 'wich': 'which', |
| 'wierd': 'weird', |
| 'wtih': 'with', |
| } |
| |
| alternates = { |
| 'parseable': 'parsable', |
| 'sub-command': 'subcommand', |
| 'sub-commands': 'subcommands', |
| 'writeable': 'writable' |
| } |
| |
| misspellingREs = [] |
| alternateREs = [] |
| |
| for misspelling, correct in misspellings.iteritems(): |
| regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE) |
| entry = (regex, misspelling, correct) |
| misspellingREs.append(entry) |
| |
| for alternate, correct in alternates.iteritems(): |
| regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE) |
| entry = (regex, alternate, correct) |
| alternateREs.append(entry) |
| |
| def check(errmsg, output, filename, line, lineno, entry): |
| if entry[0].search(line): |
| output.write(errmsg % (filename, lineno, entry[1], entry[2])) |
| return 1 |
| else: |
| return 0 |
| |
| def spellcheck(fh, filename=None, output=sys.stderr, **opts): |
| lineno = 1 |
| ret = 0 |
| |
| if not filename: |
| filename = fh.name |
| |
| fh.seek(0) |
| for line in fh: |
| for entry in misspellingREs: |
| ret |= check(spellMsg, output, filename, line, |
| lineno, entry) |
| for entry in alternateREs: |
| ret |= check(altMsg, output, filename, line, |
| lineno, entry) |
| lineno += 1 |
| |
| return ret |