618. USPTO - Explainable AI for Patent Professionals | uspto-explainable-ai
首先,感谢组织者举办这次比赛,也感谢我的队友 @sega1031。
| 实验 | TP | FP | 理论排行榜分数 | 实际排行榜分数 |
|---|---|---|---|---|
| 1 | 1 | 0 | 0.089 | 0.08 |
| 2 | 2 | 0 | 0.159 | 0.15 |
| 候选 | tp_cover | tp | global_count | fp |
|---|---|---|---|---|
| ti:device | {0,1,2} | len({0,1,2}) = 3 | 4 | 4 - 3 = 1 |
| clm:invention | {0,2,3,4,5} | 5 | 7 | 2 |
| detd:method detd:device | {6} | 1 | 1 | 0 |
ti:device OR clm:invention OR (detd:method detd:device)。从 50 个邻居中选择两个专利。
| publication_number | ti | abst |
|---|---|---|
| US-0000-A | dog cat fox | pig cow |
| US-0001-A | cat fox | duck frog cow |
ti:cat AND ti:fox AND ab:cow| 候选 | tp_cover | tp | IDF_sum | fp |
|---|---|---|---|---|
| ti:cat ti:fox ab:cow | {0,1} | 2 | 103.24 | 0 |
def count_query_tokens(query: str):
return len([i for i in re.split('[\s+()]', query) if i])
普通 AND
query = "ti:dog ti:cat"
num_tokens = whoosh_utils.count_query_tokens(query)
print("num_tokens:", num_tokens)
qp = whoosh_utils.get_query_parser()
qp.parse(query)
num_tokens: 2
And([Term('ti', 'dog'), Term('ti', 'cat')])
特殊 AND
query = 'ti:"dog"ti:"cat"'
num_tokens: 1
And([Term('ti', 'dog'), Term('ti', 'cat')])
普通短语
query = 'ti:"The quick brown fox jumps over the lazy dog"'
num_tokens: 9
Phrase('ti', ['quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog'], slop=1, boost=1.000000)
特殊短语
query = 'ti:"The@quick@brown@fox@jumps@over@the@lazy@dog"'
num_tokens: 1
Phrase('ti', ['quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog'], slop=1, boost=1.000000)
| 候选 | tp_covers | FP |
|---|---|---|
| ti:device | {0, 1, 2, 3, 4, 5, 6} | 3 |
| ti:"dog"ti:"cat" | {0, 1, 2, 7} | 1 |
| clm:"dissociation"ab:"proteins" | {3, 4, 10} | 0 |
| ... | ... | ... |
10xTP − FP。(cpc:"B60P1/165"cpc:"E02F3/3483"cpc:"B60P1/165"cpc:"E02F3/3486") OR (cpc:"B60D1/26"cpc:"B62D49/04"cpc:"B60D1/26"cpc:"E02F3/6472"cpc:"B60D1/26"cpc:"E02F3/655"cpc:"B62D49/04"cpc:"E02F3/64"cpc:"B62D49/04"cpc:"E02F3/6472"cpc:"B62D49/04"cpc:"E02F3/655"cpc:"B62D49/04"cpc:"E02F9/2016"cpc:"E02F3/6472"cpc:"E02F9/2016"cpc:"E02F3/655"cpc:"E02F9/2016"detd:"courli"detd:"lgayea") OR (cpc:"B65F2003/0283"cpc:"E02F3/3486"cpc:"B65F3/046"cpc:"E02F3/3486"ti:"end@loader@actuating"ti:"loader@actuating"ti:"loader@actuating@mechanism"ti:"actuating@mechanism@dump"cpc:"B65F2003/0283"cpc:"B65F3/046"detd:"horbe") OR (detd:"wharfpthe"ti:"dumping@scoop"ti:"side@dumping@scoop"detd:"hatchof") OR (detd:"powerswung") OR (detd:"shlppee") OR (detd:"0rneypearce"detd:"schaepcrklaus"ti:"as@asphalt@or"ti:"improvement@therein@laying"ti:"laying@surfacing"ti:"laying@surfacing@material"ti:"machine@and@improvement"ti:"surfacing@material@as"ti:"therein@laying"ti:"therein@laying@surfacing"detd:"comprlsesr") OR (detd:"disswingable"detd:"rdlyonto"detd:"sitionedsymmetrically"detd:"understandingflof"detd:"opjusting") OR (ti:"mechanical@shoveling@machine"ti:"mechanical@shoveling") OR ((detd:"tractors"detd:"ravity"detd:"scrapers"detd:"kick"cpc:"E02F3/6472"ti:"scraper"cpc:"E02F3/656"detd:"apron"detd:"scraper"detd:"hingedly"detd:"tractor"detd:"sheave"detd:"sheaves"detd:"cooperates"detd:"dead"detd:"axle")) OR ((detd:"oor"detd:"exible"detd:"retracting"detd:"trough"detd:"retracted"detd:"turntable"detd:"underground"detd:"jacks"detd:"therealong"detd:"rectilinear"detd:"propelling"detd:"conveyor"detd:"extensible"detd:"slip"detd:"clutch"detd:"mines"detd:"adjustably"detd:"engageable"detd:"elevating"detd:"hydraulic")) OR ((detd:"shovelling"detd:"tom"detd:"cushioned"detd:"compel"detd:"swiveled"detd:"abruptly"detd:"swivelly"detd:"wardly"detd:"hose"detd:"guideway"detd:"swivelled"detd:"undesired"detd:"sup"detd:"ported"detd:"swivel"detd:"osgood"detd:"pile"detd:"muck"detd:"guideways"detd:"dig")) OR ((detd:"planetaries"detd:"payed"detd:"planetary"detd:"mosier"detd:"fiexible"detd:"2li"detd:"conveyer"detd:"tractive"detd:"lil"detd:"ior"detd:"exible"detd:"2s"detd:"yieldable"detd:"tunnels"detd:"compensating"detd:"excepting"detd:"lll"detd:"chute"detd:"illinois"detd:"geared")) OR ((detd:"fioor"detd:"simmons"detd:"overlie"detd:"hydraulically"detd:"attachable"detd:"swivelly"detd:"coal"detd:"jacks"detd:"swivel"detd:"anchor"detd:"joy"detd:"guideways"detd:"unison"detd:"propelling"detd:"pennsylvania"detd:"mining"detd:"extensible"detd:"transporting"detd:"tilted"detd:"elevating")) OR ((detd:"vfiled"detd:"communicable"detd:"eiect"detd:"hydraulically"detd:"sullivan"ti:"material"detd:"propulsion"detd:"machinery"detd:"claremont"detd:"bores"detd:"uid"detd:"pile"detd:"muck"detd:"lin"detd:"dig"detd:"trackway"detd:"conduits"detd:"5i"detd:"massachusetts"detd:"hydraulic")) OR ((detd:"i23"detd:"movementof"detd:"i26"detd:"insides"detd:"encircles"detd:"compactness"detd:"h2"detd:"cushioning"detd:"yieldably"detd:"reversely"detd:"pivoting"detd:"cams"detd:"mucking"detd:"abuts"detd:"lug"detd:"therealong"detd:"teeth"detd:"axles"detd:"scoop"detd:"nuts")) OR ((detd:"foolproof"detd:"selfcentering"detd:"impetus"detd:"i85"detd:"coaction"detd:"rockers"detd:"pinned"detd:"maxson"detd:"i00"detd:"pivotable"detd:"i05"detd:"fork"detd:"inadequate"detd:"dipper"detd:"compelling"detd:"plungers"detd:"incapable"detd:"h5"detd:"abrupt"detd:"tang")) OR ((detd:"evidently"detd:"shank"detd:"receivable"detd:"hoses"detd:"venting"detd:"urges"detd:"interrupting"detd:"vented"detd:"hose"detd:"rolls"detd:"inactive"detd:"claremont"detd:"interrupted"detd:"3i"detd:"joy"detd:"guideways"detd:"trackway"detd:"pennsylvania"detd:"embodies"detd:"assumes")) OR ((detd:"seam"detd:"bevel"detd:"brake"detd:"rocked"detd:"wheeled"detd:"propulsion"detd:"spur"detd:"rst"detd:"coal"detd:"pinion"detd:"withdrawal"detd:"gearing"detd:"shafts"detd:"extensible"detd:"clutch"detd:"keyed"detd:"elevating"detd:"hydraulic")) OR ((detd:"draulic"detd:"hy"detd:"4s"detd:"vfor"detd:"oor"detd:"zontal"detd:"withdrawing"detd:"ie"detd:"andv"detd:"progresses"detd:"mw"detd:"lo"detd:"anchor"detd:"teeth"detd:"conveyor"detd:"extremity"detd:"3l"detd:"hydraulic")) OR ((detd:"isv"detd:"isprovided"detd:"sion"detd:"ropes"detd:"rope"detd:"suddenly"detd:"vof"detd:"relied"detd:"thel"detd:"urge"detd:"anda"detd:"4l"detd:"0f"detd:"gearing"detd:"anchored"detd:"transportation"detd:"mining"detd:"lthe"detd:"transporting")) OR ((detd:"posltion"detd:"motive"detd:"unwind"detd:"drifts"detd:"rearmost"detd:"tunnels"detd:"cated"detd:"segmental"detd:"injury"detd:"imparting"detd:"pile"detd:"ward"detd:"ap"detd:"scoop"detd:"swings"detd:"mines"detd:"reversible")) OR ((detd:"adaptedv"detd:"grooved"detd:"ore"detd:"chine"detd:"vhen"detd:"t0"detd:"opera"detd:"vand"detd:"sidewise"detd:"wheeled"detd:"thev"detd:"meshes"detd:"andv"cpc:"E02F9/022"detd:"movably"detd:"idler"detd:"coal"detd:"pinion"detd:"casting"detd:"bears")) OR ((detd:"compel"detd:"abruptly"detd:"tensioned"detd:"swivelled"detd:"coincident"detd:"assured"detd:"turntable"detd:"claremont"detd:"swivel"detd:"osgood"detd:"fulcrum"detd:"loader"detd:"guideways"cpc:"E02F3/3486"detd:"trackway"detd:"propelling"detd:"rolling"detd:"assumes"detd:"alinement"detd:"swings")) OR ((detd:"hoists"cpc:"E02F3/657"detd:"hoist"cpc:"E02F3/656"detd:"medial"detd:"bumper"detd:"trunnions"detd:"trunnion"detd:"grading"detd:"brake"detd:"steering"detd:"spreading"detd:"tractor"detd:"propulsion"detd:"coacting"detd:"extremities"detd:"gearing"detd:"rail"detd:"dump"detd:"clutch"))
编辑:2024/08/06
我们发布了我们的代码。
带魔法技巧的代码 (with magic) https://www.kaggle.com/code/iiyamaiiyama/uspto-5th-place-submission-with-magic 不带魔法技巧的代码 (without magic) https://www.kaggle.com/code/iiyamaiiyama/uspto-5th-place-submission-without-magic 全局计数器 (标题) https://www.kaggle.com/code/sega1031/uspto-global-title-word-counter 全局计数器 (合并) https://www.kaggle.com/code/sega1031/uspto-global-counters-limit30