Giter VIP home page Giter VIP logo

Comments (1)

danielhers avatar danielhers commented on July 20, 2024

Yes:

$ python semstr/convert.py test_files/LDC2014T12.amr -f conll
Loading spaCy model 'en_core_web_md'... Done (15.178s).                                                                                 
Converting: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:17<00:00, 17.31s/file, file=LDC2014T12.amr]
$ cat nw.wsj_000*.conll
# format = amr
# sent_id = nw.wsj_0001.1_0
1	Pierre	Pierre	PROPN	NNP	_	0	root	_	_
2	Vinken	Vinken	PROPN	NNP	_	1	Terminal	_	_
3	will	will	VERB	MD	_	0	ROOT	_	_
4	join	join	VERB	VB	_	0	root	_	_
5	the	the	DET	DT	_	0	ROOT	_	_
6	board	board	NOUN	NN	_	4	ARG1	_	_
7	as	as	ADP	IN	_	0	ROOT	_	_
8	a	a	DET	DT	_	0	ROOT	_	_
9	nonexecutive	nonexecutive	ADJ	JJ	_	10	mod	_	_
10	director	director	NOUN	NN	_	11	ARG2	_	_
11	.	.	PUNCT	.	_	0	root	_	_

# format = amr
# sent_id = nw.wsj_0001.2_0
1	Mr.	Mr.	PROPN	NNP	_	0	root	_	_
2	Vinken	Vinken	PROPN	NNP	_	1	Terminal	_	_
3	is	be	VERB	VBZ	_	0	ROOT	_	_
4	chairman	chairman	NOUN	NN	_	8	ARG2	_	_
5	of	of	ADP	IN	_	0	ROOT	_	_
6	Elsevier	Elsevier	PROPN	NNP	_	12	name	_	_
7	N.V.	N.V.	PROPN	NNP	_	6	Terminal	_	_
8	,	,	PUNCT	,	_	0	root	_	_
9	the	the	DET	DT	_	0	ROOT	_	_
10	Dutch	dutch	ADJ	JJ	_	0	root	_	_
11	publishing	publishing	NOUN	NN	_	12	ARG0-of	_	_
12	group	group	NOUN	NN	_	8	ARG1	_	_
13	.	.	PUNCT	.	_	8	U	_	_

# format = amr
# sent_id = nw.wsj_0002.1_0
1	Rudolph	Rudolph	PROPN	NNP	_	3	name	_	_
2	Agnew	Agnew	PROPN	NNP	_	1	Terminal	_	_
3	,	,	PUNCT	,	_	0	root	_	_
4	former	former	ADJ	JJ	_	22	time	_	_
5	chairman	chairman	NOUN	NN	_	22	ARG2	_	_
6	of	of	ADP	IN	_	0	ROOT	_	_
7	Consolidated	Consolidated	PROPN	NNP	_	21	name	_	_
8	Gold	Gold	PROPN	NNP	_	7	Terminal	_	_
9	Fields	Fields	PROPN	NNPS	_	7	Terminal	_	_
10	PLC	PLC	PROPN	NNP	_	7	Terminal	_	_
11	,	,	PUNCT	,	_	0	root	_	_
12	was	be	VERB	VBD	_	0	ROOT	_	_
13	named	name	VERB	VBN	_	0	ROOT	_	_
14	a	a	DET	DT	_	0	ROOT	_	_
15	nonexecutive	nonexecutive	ADJ	JJ	_	0	ROOT	_	_
16	director	director	NOUN	NN	_	11	ARG2	_	_
17	of	of	ADP	IN	_	0	ROOT	_	_
18	this	this	DET	DT	_	0	ROOT	_	_
19	British	british	ADJ	JJ	_	0	ROOT	_	_
20	industrial	industrial	ADJ	JJ	_	0	ROOT	_	_
21	conglomerate	conglomerate	NOUN	NN	_	22	ARG1	_	_
21	conglomerate	conglomerate	NOUN	NN	_	3	ARG1	_	_
22	.	.	PUNCT	.	_	0	root	_	_

# format = amr
# sent_id = nw.wsj_0003.1_0
1	A	a	DET	DT	_	0	ROOT	_	_
2	form	form	NOUN	NN	_	4	mod	_	_
3	of	of	ADP	IN	_	0	ROOT	_	_
4	asbestos	asbestos	NOUN	NN	_	0	root	_	_
5	once	once	ADV	RB	_	8	time	_	_
6	used	use	VERB	VBD	_	0	ROOT	_	_
7	to	to	PART	TO	_	0	ROOT	_	_
8	make	make	VERB	VB	_	0	root	_	_
9	Kent	Kent	PROPN	NNP	_	10	name	_	_
10	cigarette	cigarette	NOUN	NN	_	0	root	_	_
11	filters	filter	NOUN	NNS	_	0	ROOT	_	_
12	has	have	VERB	VBZ	_	0	ROOT	_	_
13	caused	cause	VERB	VBN	_	0	ROOT	_	_
14	a	a	DET	DT	_	0	ROOT	_	_
15	high	high	ADJ	JJ	_	4	ARG1	_	_
16	percentage	percentage	NOUN	NN	_	15	domain	_	_
17	of	of	ADP	IN	_	0	ROOT	_	_
18	cancer	cancer	NOUN	NN	_	0	root	_	_
19	deaths	death	NOUN	NNS	_	0	ROOT	_	_
20	among	among	ADP	IN	_	0	ROOT	_	_
21	a	a	DET	DT	_	0	ROOT	_	_
22	group	group	NOUN	NN	_	0	ROOT	_	_
23	of	of	ADP	IN	_	0	ROOT	_	_
24	workers	worker	NOUN	NNS	_	0	ROOT	_	_
25	exposed	expose	VERB	VBN	_	0	ROOT	_	_
26	to	to	ADP	IN	_	0	ROOT	_	_
27	it	-PRON-	PRON	PRP	_	0	ROOT	_	_
28	,	,	PUNCT	,	_	0	root	_	_
29	researchers	researcher	NOUN	NNS	_	0	ROOT	_	_
30	reported	report	VERB	VBD	_	0	ROOT	_	_
31	.	.	PUNCT	.	_	28	U	_	_


or, if you want CoNLL-U,

$ python semstr/convert.py test_files/LDC2014T12.amr -f conllu
Loading spaCy model 'en_core_web_md'... Done (15.073s).                                                                                 
Converting: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:18<00:00, 18.64s/file, file=LDC2014T12.amr]
$ cat nw.wsj_000*.conllu
# format = amr
# sent_id = nw.wsj_0001.1_0
# text = Pierre Vinken will join the board as a nonexecutive director .
# doc_id = nw.wsj_0001
1	Pierre	Pierre	PROPN	NNP	_	0	root	0:root	_
2	Vinken	Vinken	PROPN	NNP	_	1	flat	1:flat	_
3	will	will	VERB	MD	_	1	orphan	1:orphan	_
4	join	join	VERB	VB	_	1	orphan	1:orphan	_
5	the	the	DET	DT	_	1	orphan	1:orphan	_
6	board	board	NOUN	NN	_	4	ARG1	4:ARG1	_
7	as	as	ADP	IN	_	1	orphan	1:orphan	_
8	a	a	DET	DT	_	1	orphan	1:orphan	_
9	nonexecutive	nonexecutive	ADJ	JJ	_	10	mod	10:mod	_
10	director	director	NOUN	NN	_	11	ARG2	11:ARG2	_
11	.	.	PUNCT	.	_	1	punct	1:punct	_

# format = amr
# sent_id = nw.wsj_0001.2_0
# text = Mr. Vinken is chairman of Elsevier N.V. , the Dutch publishing group .
# doc_id = nw.wsj_0001
1	Mr.	Mr.	PROPN	NNP	_	0	root	0:root	_
2	Vinken	Vinken	PROPN	NNP	_	1	flat	1:flat	_
3	is	be	VERB	VBZ	_	1	orphan	1:orphan	_
4	chairman	chairman	NOUN	NN	_	8	ARG2	8:ARG2	_
5	of	of	ADP	IN	_	1	orphan	1:orphan	_
6	Elsevier	Elsevier	PROPN	NNP	_	12	name	12:name	_
7	N.V.	N.V.	PROPN	NNP	_	6	flat	6:flat	_
8	,	,	PUNCT	,	_	1	punct	1:punct	_
9	the	the	DET	DT	_	1	orphan	1:orphan	_
10	Dutch	dutch	ADJ	JJ	_	1	orphan	1:orphan	_
11	publishing	publishing	NOUN	NN	_	12	ARG0-of	12:ARG0-of	_
12	group	group	NOUN	NN	_	8	ARG1	8:ARG1	_
13	.	.	PUNCT	.	_	8	punct	8:punct	_

# format = amr
# sent_id = nw.wsj_0002.1_0
# text = Rudolph Agnew , former chairman of Consolidated Gold Fields PLC , was named a nonexecutive director of this British industrial conglomerate .
# doc_id = nw.wsj_0002
1	Rudolph	Rudolph	PROPN	NNP	_	3	name	3:name	_
2	Agnew	Agnew	PROPN	NNP	_	1	flat	1:flat	_
3	,	,	PUNCT	,	_	0	root	0:root	_
4	former	former	ADJ	JJ	_	22	time	22:time	_
5	chairman	chairman	NOUN	NN	_	22	ARG2	22:ARG2	_
6	of	of	ADP	IN	_	3	orphan	3:orphan	_
7	Consolidated	Consolidated	PROPN	NNP	_	21	name	21:name	_
8	Gold	Gold	PROPN	NNP	_	7	flat	7:flat	_
9	Fields	Fields	PROPN	NNPS	_	7	flat	7:flat	_
10	PLC	PLC	PROPN	NNP	_	7	flat	7:flat	_
11	,	,	PUNCT	,	_	3	punct	3:punct	_
12	was	be	VERB	VBD	_	3	orphan	3:orphan	_
13	named	name	VERB	VBN	_	3	orphan	3:orphan	_
14	a	a	DET	DT	_	3	orphan	3:orphan	_
15	nonexecutive	nonexecutive	ADJ	JJ	_	3	orphan	3:orphan	_
16	director	director	NOUN	NN	_	11	ARG2	11:ARG2	_
17	of	of	ADP	IN	_	3	orphan	3:orphan	_
18	this	this	DET	DT	_	3	orphan	3:orphan	_
19	British	british	ADJ	JJ	_	3	orphan	3:orphan	_
20	industrial	industrial	ADJ	JJ	_	3	orphan	3:orphan	_
21	conglomerate	conglomerate	NOUN	NN	_	22	ARG1	22:ARG1|3:ARG1	_
22	.	.	PUNCT	.	_	3	punct	3:punct	_

# format = amr
# sent_id = nw.wsj_0003.1_0
# text = A form of asbestos once used to make Kent cigarette filters has caused a high percentage of cancer deaths among a group of workers exposed to it , researchers reported .
# doc_id = nw.wsj_0003
1	A	a	DET	DT	_	4	orphan	4:orphan	_
2	form	form	NOUN	NN	_	4	mod	4:mod	_
3	of	of	ADP	IN	_	4	orphan	4:orphan	_
4	asbestos	asbestos	NOUN	NN	_	0	root	0:root	_
5	once	once	ADV	RB	_	8	time	8:time	_
6	used	use	VERB	VBD	_	4	orphan	4:orphan	_
7	to	to	PART	TO	_	4	orphan	4:orphan	_
8	make	make	VERB	VB	_	4	orphan	4:orphan	_
9	Kent	Kent	PROPN	NNP	_	10	name	10:name	_
10	cigarette	cigarette	NOUN	NN	_	4	orphan	4:orphan	_
11	filters	filter	NOUN	NNS	_	4	orphan	4:orphan	_
12	has	have	VERB	VBZ	_	4	orphan	4:orphan	_
13	caused	cause	VERB	VBN	_	4	orphan	4:orphan	_
14	a	a	DET	DT	_	4	orphan	4:orphan	_
15	high	high	ADJ	JJ	_	4	ARG1	4:ARG1	_
16	percentage	percentage	NOUN	NN	_	15	domain	15:domain	_
17	of	of	ADP	IN	_	4	orphan	4:orphan	_
18	cancer	cancer	NOUN	NN	_	4	orphan	4:orphan	_
19	deaths	death	NOUN	NNS	_	4	orphan	4:orphan	_
20	among	among	ADP	IN	_	4	orphan	4:orphan	_
21	a	a	DET	DT	_	4	orphan	4:orphan	_
22	group	group	NOUN	NN	_	4	orphan	4:orphan	_
23	of	of	ADP	IN	_	4	orphan	4:orphan	_
24	workers	worker	NOUN	NNS	_	4	orphan	4:orphan	_
25	exposed	expose	VERB	VBN	_	4	orphan	4:orphan	_
26	to	to	ADP	IN	_	4	orphan	4:orphan	_
27	it	-PRON-	PRON	PRP	_	4	orphan	4:orphan	_
28	,	,	PUNCT	,	_	4	punct	4:punct	_
29	researchers	researcher	NOUN	NNS	_	4	orphan	4:orphan	_
30	reported	report	VERB	VBD	_	4	orphan	4:orphan	_
31	.	.	PUNCT	.	_	28	punct	28:punct	_

from semstr.

Related Issues (3)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.